pax_global_header00006660000000000000000000000064133371564370014526gustar00rootroot0000000000000052 comment=ff0672dc79af994b1ba4943886326d3c567cfae4 sleef-3.3.1/000077500000000000000000000000001333715643700126305ustar00rootroot00000000000000sleef-3.3.1/.travis.yml000066400000000000000000000041761333715643700147510ustar00rootroot00000000000000language: c # Default linux jobs os: linux sudo: required dist: trusty # Include osx jobs matrix: include: - os: osx osx_image: xcode8 compiler: gcc env: - LABEL="osx-gcc" - os: osx compiler: clang # use default apple clang env: - LABEL="osx-clang" - os: linux addons: apt: sources: - ubuntu-toolchain-r-test packages: - g++-7 env: - LABEL="x86_64-gcc" - os: linux addons: apt: sources: - llvm-toolchain-trusty-5.0 packages: - clang-5.0 env: - LABEL="x86_64-clang" - os: linux services: docker env: - LABEL="aarch64-gcc" - ENABLE_DOCKER="true" - os: linux services: docker env: - LABEL="armhf-gcc" - ENABLE_DOCKER="true" - os: linux services: docker env: - LABEL="ppc64el-clang" - ENABLE_DOCKER="true" before_install: - export PATH=$PATH:/usr/bin:${TRAVIS_BUILD_DIR}/sde-external-8.12.0-2017-10-23-lin - cd ${TRAVIS_BUILD_DIR} - chmod +x ${TRAVIS_BUILD_DIR}/travis/*.sh - if [[ "${ENABLE_DOCKER}" == "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/setupdocker.sh; fi - if [[ "${ENABLE_DOCKER}" == "true" ]]; then docker exec xenial /build/travis/before_install.${LABEL}.sh; fi - if [[ ( "${LABEL}" == "x86_64-gcc" ) && ( "{SDE_URL}" != "" ) ]]; then wget -q ${SDE_URL}; fi - if [[ ( "${LABEL}" == "x86_64-clang" ) && ( "{SDE_URL}" != "" ) ]]; then wget -q ${SDE_URL}; fi - if [[ "${ENABLE_DOCKER}" != "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/before_install.${LABEL}.sh; fi before_script: - if [[ "${ENABLE_DOCKER}" == "true" ]]; then docker exec xenial /build/travis/before_script.${LABEL}.sh; fi - if [[ "${ENABLE_DOCKER}" != "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/before_script.${LABEL}.sh; fi script: - if [[ "${ENABLE_DOCKER}" == "true" ]]; then docker exec xenial /build/travis/script.${LABEL}.sh; fi - if [[ "${ENABLE_DOCKER}" != "true" ]]; then ${TRAVIS_BUILD_DIR}/travis/script.${LABEL}.sh; fi sleef-3.3.1/CHANGELOG.md000066400000000000000000000050131333715643700144400ustar00rootroot00000000000000# Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## 3.3.1 - 2018-08-20 ### Added - FreeBSD support is added ### Changed - i386 build problem is fixed - Trigonometric functions now evaluate correctly with full FP domain. https://github.com/shibatch/sleef/pull/210">#210) ## 3.3 - 2018-07-06 ### Added - SVE target support is added to libsleef. https://github.com/shibatch/sleef/pull/180 - SVE target support is added to DFT. With this patch, DFT operations can be carried out using 256, 512, 1024 and 2048-bit wide vectors according to runtime availability of vector registers and operators. https://github.com/shibatch/sleef/pull/182 - 3.5-ULP versions of sinh, cosh, tanh, sinhf, coshf, tanhf, and the corresponding testing functionalities are added. https://github.com/shibatch/sleef/pull/192 - Power VSX target support is added to libsleef. https://github.com/shibatch/sleef/pull/195 - Payne-Hanek like argument reduction is added to libsleef. https://github.com/shibatch/sleef/pull/197 ## 3.2 - 2018-02-26 ### Added - The whole build system of the project migrated from makefiles to cmake. In particualr this includes `libsleef`, `libsleefgnuabi`, `libdft` and all the tests. - Benchmarks that compare `libsleef` vs `SVML` on X86 Linux are available in the project tree under src/libm-benchmarks directory. - Extensive upstream testing via Travis CI and Appveyor, on the following systems: * OS: Windows / Linux / OSX. * Compilers: gcc / clang / MSVC. * Targets: X86 (SSE/AVX/AVX2/AVX512F), AArch64 (Advanced SIMD), ARM (NEON). Emulators like QEMU or SDE can be used to run the tests. - Added the following new vector functions (with relative testing): * `log2` - New compatibility tests have been added to check that `libsleefgnuabi` exports the GNUABI symbols correctly. - The library can be compiled to an LLVM bitcode object. - Added masked interface to the library to support AVX512F masked vectorization. ### Changed - Use native instructions if available for `sqrt`. - Fixed fmax and fmin behavior on AArch64: https://github.com/shibatch/sleef/pull/140 - Speed improvements for `asin`, `acos`, `fmod` and `log`. Computation speed of other functions are also improved by general optimization. https://github.com/shibatch/sleef/pull/97 - Removed `libm` dependency. ### Removed - Makefile build system sleef-3.3.1/CMakeLists.txt000066400000000000000000000143141333715643700153730ustar00rootroot00000000000000# Options option(BUILD_SHARED_LIBS "Build shared libs" ON) option(BUILD_DFT "libsleefdft will be built." ON) option(BUILD_GNUABI_LIBS "libsleefgnuabi will be built." ON) option(BUILD_TESTS "Tests will be built." ON) option(SLEEF_TEST_ALL_IUT "Perform tests on implementations with all vector extensions" OFF) option(SLEEF_SHOW_CONFIG "Show SLEEF configuration status messages." ON) option(SLEEF_SHOW_ERROR_LOG "Show cmake error log." OFF) # See doc/build-with-cmake.md for instructions on how to build Sleef. cmake_minimum_required(VERSION 3.4.3) enable_testing() set(SLEEF_VERSION_MAJOR 3) set(SLEEF_VERSION_MINOR 3) set(SLEEF_VERSION_PATCHLEVEL 1) set(SLEEF_VERSION ${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR}.${SLEEF_VERSION_PATCHLEVEL}) set(SLEEF_SOVERSION ${SLEEF_VERSION_MAJOR}) project(SLEEF VERSION ${SLEEF_VERSION} LANGUAGES C) # Sanity check for in-source builds which we do not want to happen if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) message(FATAL_ERROR "SLEEF does not allow in-source builds. You can refer to doc/build-with-cmake.md for instructions on how provide a \ separate build directory. Note: Please remove autogenerated file \ `CMakeCache.txt` and directory `CMakeFiles` in the current directory.") endif() # Set output directories for the library files set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) foreach(CONFIG ${CMAKE_CONFIGURATION_TYPES}) string(TOUPPER ${CONFIG} CONFIG) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/lib) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_${CONFIG} ${PROJECT_BINARY_DIR}/bin) endforeach(CONFIG CMAKE_CONFIGURATION_TYPES) # Path for finding cmake modules set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules) set(SLEEF_SCRIPT_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Scripts CACHE PATH "Path for finding sleef specific cmake scripts") # sleef-config.h.in passes cmake settings to the source code include(Configure.cmake) configure_file( ${PROJECT_SOURCE_DIR}/sleef-config.h.in ${PROJECT_BINARY_DIR}/include/sleef-config.h @ONLY) # We like to have a documented index of all targets in the project. The # variables listed below carry the names of the targets defined throughout # the project. # Generates object file (shared library) `libsleef` # Defined in src/libm/CMakeLists.txt via command add_library set(TARGET_LIBSLEEF "sleef") set(TARGET_LIBSLEEFGNUABI "sleefgnuabi") # Generates the sleef.h headers and all the rename headers # Defined in src/libm/CMakeLists.txt via custom commands and a custom target set(TARGET_HEADERS "headers") # Generates executable files for running the test suite # Defined in src/libm-tester/CMakeLists.txt via command add_executable set(TARGET_TESTER "tester") set(TARGET_IUT "iut") set(TARGET_IUTSSE2 "iutsse2") set(TARGET_IUTSSE4 "iutsse4") set(TARGET_IUTAVX "iutavx") set(TARGET_IUTFMA4 "iutfma4") set(TARGET_IUTAVX2 "iutavx2") set(TARGET_IUTAVX2128 "iutavx2128") set(TARGET_IUTAVX512F "iutavx512f") set(TARGET_IUTADVSIMD "iutadvsimd") set(TARGET_IUTNEON32 "iutneon32") set(TARGET_IUTSVE "iutsve") set(TARGET_IUTVSX "iutvsx") # The target to generate LLVM bitcode only, available when SLEEF_ENABLE_LLVM_BITCODE is passed to cmake set(TARGET_LLVM_BITCODE "llvm-bitcode") # Generates the helper executable file mkrename needed to write the sleef header set(TARGET_MKRENAME "mkrename") set(TARGET_MKRENAME_GNUABI "mkrename_gnuabi") set(TARGET_MKMASKED_GNUABI "mkmasked_gnuabi") # Generates the helper executable file mkdisp needed to write the sleef header set(TARGET_MKDISP "mkdisp") set(TARGET_MKALIAS "mkalias") # Generates static library common # Defined in src/common/CMakeLists.txt via command add_library set(TARGET_LIBCOMMON_OBJ "common") set(TARGET_LIBARRAYMAP_OBJ "arraymap") # Function used to add an executable that is executed on host function(add_host_executable TARGETNAME) if (NOT CMAKE_CROSSCOMPILING) add_executable(${TARGETNAME} ${ARGN}) else() add_executable(${TARGETNAME} IMPORTED) set_property(TARGET ${TARGETNAME} PROPERTY IMPORTED_LOCATION ${NATIVE_BUILD_DIR}/bin/${TARGETNAME}) endif() endfunction() # Generates object file (shared library) `libsleefdft` # Defined in src/dft/CMakeLists.txt via command add_library set(TARGET_LIBDFT "sleefdft") # Check subdirectories add_subdirectory("src") # Extra messages at configuration time. By default is active, it can be # turned off by invoking cmake with "-DSLEEF_SHOW_CONFIG=OFF". if(SLEEF_SHOW_CONFIG) message(STATUS "Configuring build for ${PROJECT_NAME}-v${SLEEF_VERSION}") message(" Target system: ${CMAKE_SYSTEM}") message(" Target processor: ${CMAKE_SYSTEM_PROCESSOR}") message(" Host system: ${CMAKE_HOST_SYSTEM}") message(" Host processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}") message(" Detected C compiler: ${CMAKE_C_COMPILER_ID} @ ${CMAKE_C_COMPILER}") if(CMAKE_CROSSCOMPILING) message(" Crosscompiling SLEEF.") message(" Native build dir: ${NATIVE_BUILD_DIR}") endif(CMAKE_CROSSCOMPILING) message(STATUS "Using option `${SLEEF_C_FLAGS}` to compile libsleef") message(STATUS "Building shared libs : " ${BUILD_SHARED_LIBS}) message(STATUS "MPFR : " ${LIB_MPFR}) if (MPFR_INCLUDE_DIR) message(STATUS "MPFR header file in " ${MPFR_INCLUDE_DIR}) endif() message(STATUS "GMP : " ${LIBGMP}) message(STATUS "RT : " ${LIBRT}) message(STATUS "FFTW3 : " ${LIBFFTW3}) message(STATUS "SDE : " ${SDE_COMMAND}) message(STATUS "RUNNING_ON_TRAVIS : " ${RUNNING_ON_TRAVIS}) message(STATUS "COMPILER_SUPPORTS_OPENMP : " ${COMPILER_SUPPORTS_OPENMP}) if(ENABLE_GNUABI) message(STATUS "A version of SLEEF compatible with libm and libmvec in GNU libc will be produced (${TARGET_LIBSLEEFGNUABI}.so)") endif() if (COMPILER_SUPPORTS_SVE) message(STATUS "Building SLEEF with VLA SVE support") if (ARMIE_COMMAND) message(STATUS "Arm Instruction Emulator found at ${ARMIE_COMMAND}") message(STATUS "SVE testing is done with ${SVE_VECTOR_BITS}-bits vectors.") endif() endif() endif(SLEEF_SHOW_CONFIG) if (MSVC) message("") message("*** Note: Parallel build is not supported on Microsoft Visual Studio") endif() sleef-3.3.1/CONTRIBUTORS.md000066400000000000000000000013431333715643700151100ustar00rootroot00000000000000# List of contributors | Name | Affiliation | Github profile | | -------------------- | ----------------------- | ---------------------------------- | | Naoki Shibata | Nara Institute of Science and Technology | https://github.com/shibatch | | Jilayne Lovejoy | Arm Inc. | https://github.com/jlovejoy | | Francesco Petrogalli | Arm Ltd. | https://github.com/fpetrogalli-arm | | Diana Bite | Arm Ltd. | https://github.com/diaena | | Alexandre Mutel | Unity Technologies | https://github.com/xoofx | | Martin Krastev | Chaos Group | https://github.com/blu | sleef-3.3.1/Configure.cmake000066400000000000000000000411561333715643700155620ustar00rootroot00000000000000include(CheckCCompilerFlag) include(CheckCSourceCompiles) include(CheckTypeSize) # Some toolchains require explicit linking of the libraries following. find_library(LIB_MPFR mpfr) find_library(LIBM m) find_library(LIBGMP gmp) find_library(LIBRT rt) find_library(LIBFFTW3 fftw3) find_path(MPFR_INCLUDE_DIR NAMES mpfr.h ONLY_CMAKE_FIND_ROOT_PATH) find_path(FFTW3_INCLUDE_DIR NAMES fftw3.h ONLY_CMAKE_FIND_ROOT_PATH) if (NOT LIBM) set(LIBM "") endif() if (NOT LIBRT) set(LIBRT "") endif() # The library currently supports the following SIMD architectures set(SLEEF_SUPPORTED_EXTENSIONS AVX512F AVX2 AVX2128 FMA4 AVX SSE4 SSE2 # x86 ADVSIMD SVE # Aarch64 NEON32 # Aarch32 VSX # PPC64 CACHE STRING "List of SIMD architectures supported by libsleef." ) set(SLEEF_SUPPORTED_GNUABI_EXTENSIONS SSE2 AVX AVX2 AVX512F ADVSIMD SVE CACHE STRING "List of SIMD architectures supported by libsleef for GNU ABI." ) # Force set default build type if none was specified # Note: some sleef code requires the optimisation flags turned on if(NOT CMAKE_BUILD_TYPE) message(STATUS "Setting build type to 'Release' (required for full support).") set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "RelWithDebInfo" "MinSizeRel") endif() # Function used to generate safe command arguments for add_custom_command function(command_arguments PROPNAME) set(quoted_args "") foreach(arg ${ARGN}) list(APPEND quoted_args "\"${arg}\"" ) endforeach() set(${PROPNAME} ${quoted_args} PARENT_SCOPE) endfunction() # PLATFORM DETECTION if((CMAKE_SYSTEM_PROCESSOR MATCHES "x86") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$")) set(SLEEF_ARCH_X86 ON CACHE INTERNAL "True for x86 architecture.") set(SLEEF_HEADER_LIST SSE_ SSE2 SSE4 AVX_ AVX FMA4 AVX2 AVX2128 AVX512F_ AVX512F ) command_arguments(HEADER_PARAMS_SSE_ 2 4 __m128d __m128 __m128i __m128i __SSE2__) command_arguments(HEADER_PARAMS_SSE2 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse2) command_arguments(HEADER_PARAMS_SSE4 2 4 __m128d __m128 __m128i __m128i __SSE2__ sse4) command_arguments(HEADER_PARAMS_AVX_ 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__) command_arguments(HEADER_PARAMS_AVX 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__ avx) command_arguments(HEADER_PARAMS_FMA4 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__ fma4) command_arguments(HEADER_PARAMS_AVX2 4 8 __m256d __m256 __m128i __m256i __AVX__ avx2) command_arguments(HEADER_PARAMS_AVX2128 2 4 __m128d __m128 __m128i __m128i __SSE2__ avx2128) command_arguments(HEADER_PARAMS_AVX512F_ 8 16 __m512d __m512 __m256i __m512i __AVX512F__) command_arguments(HEADER_PARAMS_AVX512F 8 16 __m512d __m512 __m256i __m512i __AVX512F__ avx512f) command_arguments(ALIAS_PARAMS_AVX512F_DP 8 __m512d __m256i e avx512f) command_arguments(ALIAS_PARAMS_AVX512F_SP -16 __m512 __m512i e avx512f) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") set(SLEEF_ARCH_AARCH64 ON CACHE INTERNAL "True for Aarch64 architecture.") # Aarch64 requires support for advsimdfma4 set(COMPILER_SUPPORTS_ADVSIMD 1) set(SLEEF_HEADER_LIST ADVSIMD_ ADVSIMD SVE ) command_arguments(HEADER_PARAMS_ADVSIMD_ 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON) command_arguments(HEADER_PARAMS_ADVSIMD 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON advsimd) command_arguments(HEADER_PARAMS_SVE 2 4 svfloat64_t svfloat32_t svint32_t svint32_t __ARM_FEATURE_SVE sve) command_arguments(ALIAS_PARAMS_ADVSIMD_DP 2 float64x2_t int32x2_t n advsimd) command_arguments(ALIAS_PARAMS_ADVSIMD_SP -4 float32x4_t int32x4_t n advsimd) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") set(SLEEF_ARCH_AARCH32 ON CACHE INTERNAL "True for Aarch32 architecture.") set(COMPILER_SUPPORTS_NEON32 1) set(SLEEF_HEADER_LIST NEON32_ NEON32 ) command_arguments(HEADER_PARAMS_NEON32_ 2 4 - float32x4_t int32x2_t int32x4_t __ARM_NEON__) command_arguments(HEADER_PARAMS_NEON32 2 4 - float32x4_t int32x2_t int32x4_t __ARM_NEON__ neon) command_arguments(ALIAS_PARAMS_NEON32_SP -4 float32x4_t int32x4_t - neon) command_arguments(ALIAS_PARAMS_NEON32_DP 0) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") set(SLEEF_ARCH_PPC64 ON CACHE INTERNAL "True for PPC64 architecture.") set(SLEEF_HEADER_LIST VSX_ VSX ) set(HEADER_PARAMS_VSX 2 4 "vector double" "vector float" "vector int" "vector int" __VSX__ vsx) set(HEADER_PARAMS_VSX_ 2 4 "vector double" "vector float" "vector int" "vector int" __VSX__ vsx) set(ALIAS_PARAMS_VSX_DP 2 "vector double" "vector int" - vsx) set(ALIAS_PARAMS_VSX_SP -4 "vector float" "vector int" - vsx) endif() # MKRename arguments per type command_arguments(RENAME_PARAMS_SSE2 2 4 sse2) command_arguments(RENAME_PARAMS_SSE4 2 4 sse4) command_arguments(RENAME_PARAMS_AVX 4 8 avx) command_arguments(RENAME_PARAMS_FMA4 4 8 fma4) command_arguments(RENAME_PARAMS_AVX2 4 8 avx2) command_arguments(RENAME_PARAMS_AVX2128 2 4 avx2128) command_arguments(RENAME_PARAMS_AVX512F 8 16 avx512f) command_arguments(RENAME_PARAMS_ADVSIMD 2 4 advsimd) command_arguments(RENAME_PARAMS_NEON32 2 4 neon) command_arguments(RENAME_PARAMS_VSX 2 4 vsx) # The vector length parameters in SVE, for SP and DP, are chosen for # the smallest SVE vector size (128-bit). The name is generated using # the "x" token of VLA SVE vector functions. command_arguments(RENAME_PARAMS_SVE 2 4 sve) command_arguments(RENAME_PARAMS_GNUABI_SSE2 sse2 b 2 4 _mm128d _mm128 _mm128i _mm128i __SSE2__) command_arguments(RENAME_PARAMS_GNUABI_AVX avx c 4 8 __m256d __m256 __m128i "struct { __m128i x, y$ }" __AVX__) command_arguments(RENAME_PARAMS_GNUABI_AVX2 avx2 d 4 8 __m256d __m256 __m128i __m256i __AVX2__) command_arguments(RENAME_PARAMS_GNUABI_AVX512F avx512f e 8 16 __m512d __m512 __m256i __m512i __AVX512F__) command_arguments(RENAME_PARAMS_GNUABI_ADVSIMD advsimd n 2 4 float64x2_t float32x4_t int32x2_t int32x4_t __ARM_NEON) # The vector length parameters in SVE, for SP and DP, are chosen for # the smallest SVE vector size (128-bit). The name is generated using # the "x" token of VLA SVE vector functions. command_arguments(RENAME_PARAMS_GNUABI_SVE sve s 2 4 svfloat64_t svfloat32_t svint32_t svint32_t __ARM_SVE) command_arguments(MKMASKED_PARAMS_GNUABI_AVX512F_dp avx512f e 8) command_arguments(MKMASKED_PARAMS_GNUABI_AVX512F_sp avx512f e -16) command_arguments(MKMASKED_PARAMS_GNUABI_SVE_dp sve s 2) command_arguments(MKMASKED_PARAMS_GNUABI_SVE_sp sve s -4) # COMPILER DETECTION # Detect CLANG executable path (on both Windows and Linux/OSX) if(NOT CLANG_EXE_PATH) # If the current compiler used by CMAKE is already clang, use this one directly if(CMAKE_C_COMPILER MATCHES "clang") set(CLANG_EXE_PATH ${CMAKE_C_COMPILER}) else() # Else we may find clang on the path? find_program(CLANG_EXE_PATH NAMES clang "clang-5.0" "clang-4.0" "clang-3.9") endif() endif() # Allow to define the Gcc/Clang here # As we might compile the lib with MSVC, but generates bitcode with CLANG # Intel vector extensions. set(CLANG_FLAGS_ENABLE_SSE2 "-msse2") set(CLANG_FLAGS_ENABLE_SSE4 "-msse4.1") set(CLANG_FLAGS_ENABLE_AVX "-mavx") set(CLANG_FLAGS_ENABLE_FMA4 "-mfma4") set(CLANG_FLAGS_ENABLE_AVX2 "-mavx2;-mfma") set(CLANG_FLAGS_ENABLE_AVX2128 "-mavx2;-mfma") set(CLANG_FLAGS_ENABLE_AVX512F "-mavx512f") set(CLANG_FLAGS_ENABLE_NEON32 "--target=arm-linux-gnueabihf;-mcpu=cortex-a8") # Arm AArch64 vector extensions. set(CLANG_FLAGS_ENABLE_ADVSIMD "-march=armv8-a+simd") set(CLANG_FLAGS_ENABLE_SVE "-march=armv8-a+sve") # PPC64 set(CLANG_FLAGS_ENABLE_VSX "-mvsx") # All variables storing compiler flags should be prefixed with FLAGS_ if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") # Always compile sleef with -ffp-contract. set(FLAGS_STRICTMATH "-ffp-contract=off") set(FLAGS_FASTMATH "-ffast-math") # Without the options below, gcc generates calls to libm set(FLAGS_NO_ERRNO "-fno-math-errno -fno-trapping-math") # Intel vector extensions. foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) set(FLAGS_ENABLE_${SIMD} ${CLANG_FLAGS_ENABLE_${SIMD}}) endforeach() # Warning flags. set(FLAGS_WALL "-Wall -Wno-unused -Wno-attributes -Wno-unused-result") if(CMAKE_C_COMPILER_ID MATCHES "GNU") # The following compiler option is needed to suppress the warning # "AVX vector return without AVX enabled changes the ABI" at # src/arch/helpervecext.h:88 string(CONCAT FLAGS_WALL ${FLAGS_WALL} " -Wno-psabi") set(FLAGS_ENABLE_NEON32 "-mfpu=neon") endif(CMAKE_C_COMPILER_ID MATCHES "GNU") elseif(MSVC) # Intel vector extensions. set(FLAGS_ENABLE_SSE2 /D__SSE2__) set(FLAGS_ENABLE_SSE4 /D__SSE2__ /D__SSE3__ /D__SSE4_1__) set(FLAGS_ENABLE_AVX /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /arch:AVX) set(FLAGS_ENABLE_FMA4 /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /D__FMA4__ /arch:AVX2) set(FLAGS_ENABLE_AVX2 /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2) set(FLAGS_ENABLE_AVX2128 /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /arch:AVX2) set(FLAGS_ENABLE_AVX512F /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__AVX__ /D__AVX2__ /D__AVX512F__ /arch:AVX2) set(FLAGS_WALL "/D_CRT_SECURE_NO_WARNINGS") set(FLAGS_NO_ERRNO "") elseif(CMAKE_C_COMPILER_ID MATCHES "Intel") set(FLAGS_ENABLE_SSE2 "-msse2") set(FLAGS_ENABLE_SSE4 "-msse4.1") set(FLAGS_ENABLE_AVX "-mavx") set(FLAGS_ENABLE_AVX2 "-march=core-avx2") set(FLAGS_ENABLE_AVX2128 "-march=core-avx2") set(FLAGS_ENABLE_AVX512F "-xCOMMON-AVX512") set(FLAGS_STRICTMATH "-fp-model strict -Qoption,cpp,--extended_float_type -qoverride-limits") set(FLAGS_FASTMATH "-fp-model fast=2 -Qoption,cpp,--extended_float_type -qoverride-limits") set(FLAGS_WALL "-fmax-errors=3 -Wall -Wno-unused -Wno-attributes") set(FLAGS_NO_ERRNO "") endif() set(SLEEF_C_FLAGS "${FLAGS_WALL} ${FLAGS_STRICTMATH} ${FLAGS_NO_ERRNO}") if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER 6.99) set(DFT_C_FLAGS "${FLAGS_WALL}") else() set(DFT_C_FLAGS "${FLAGS_WALL} ${FLAGS_FASTMATH}") endif() if (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" AND CMAKE_C_COMPILER_ID MATCHES "GNU") set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -msse2 -mfpmath=sse") set(DFT_C_FLAGS "${DFT_C_FLAGS} -msse2 -mfpmath=sse -m128bit-long-double") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" AND CMAKE_C_COMPILER_ID MATCHES "Clang") set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -msse2 -mfpmath=sse") set(DFT_C_FLAGS "${DFT_C_FLAGS} -msse2 -mfpmath=sse") endif() if(CYGWIN OR MINGW) set(SLEEF_C_FLAGS "${SLEEF_C_FLAGS} -fno-asynchronous-unwind-tables") set(DFT_C_FLAGS "${DFT_C_FLAGS} -fno-asynchronous-unwind-tables") endif() # FEATURE DETECTION CHECK_TYPE_SIZE("long double" LD_SIZE) if(LD_SIZE GREATER "9") # This is needed to check since internal compiler error occurs with gcc 4.x CHECK_C_SOURCE_COMPILES(" typedef long double vlongdouble __attribute__((vector_size(sizeof(long double)*2))); vlongdouble vcast_vl_l(long double d) { return (vlongdouble) { d, d }; } int main() { vlongdouble vld = vcast_vl_l(0); }" COMPILER_SUPPORTS_LONG_DOUBLE) endif() CHECK_C_SOURCE_COMPILES(" int main() { __float128 r = 1; }" COMPILER_SUPPORTS_FLOAT128) # Detect if sleef supported architectures are also supported by the compiler set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SSE2}) CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif int main() { __m128d r = _mm_mul_pd(_mm_set1_pd(1), _mm_set1_pd(2)); }" COMPILER_SUPPORTS_SSE2) set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SSE4}) CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif int main() { __m128d r = _mm_floor_sd(_mm_set1_pd(1), _mm_set1_pd(2)); }" COMPILER_SUPPORTS_SSE4) set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX}) CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif int main() { __m256d r = _mm256_add_pd(_mm256_set1_pd(1), _mm256_set1_pd(2)); }" COMPILER_SUPPORTS_AVX) set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_FMA4}) CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif int main() { __m256d r = _mm256_macc_pd(_mm256_set1_pd(1), _mm256_set1_pd(2), _mm256_set1_pd(3)); }" COMPILER_SUPPORTS_FMA4) set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX2}) CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif int main() { __m256i r = _mm256_abs_epi32(_mm256_set1_epi32(1)); }" COMPILER_SUPPORTS_AVX2) set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_SVE}) CHECK_C_SOURCE_COMPILES(" #include int main() { svint32_t r = svdup_n_s32(1); }" COMPILER_SUPPORTS_SVE) set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_AVX512F}) CHECK_C_SOURCE_COMPILES(" #if defined(_MSC_VER) #include #else #include #endif __m512 addConstant(__m512 arg) { return _mm512_add_ps(arg, _mm512_set1_ps(1.f)); } int main() { __m512i a = _mm512_set1_epi32(1); __m256i ymm = _mm512_extracti64x4_epi64(a, 0); __mmask16 m = _mm512_cmp_epi32_mask(a, a, _MM_CMPINT_EQ); __m512i r = _mm512_andnot_si512(a, a); }" COMPILER_SUPPORTS_AVX512F) # AVX2 implies AVX2128 if(COMPILER_SUPPORTS_AVX2) set(COMPILER_SUPPORTS_AVX2128 1) endif() set (CMAKE_REQUIRED_FLAGS ${FLAGS_ENABLE_VSX}) CHECK_C_SOURCE_COMPILES(" #include int main() { vector double d; d = vec_perm(d, d, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)); }" COMPILER_SUPPORTS_VSX) # Check if compilation with OpenMP really succeeds # It does not succeed on Travis even though find_package(OpenMP) succeeds. find_package(OpenMP) if(OPENMP_FOUND) set (CMAKE_REQUIRED_FLAGS "${OpenMP_C_FLAGS}") CHECK_C_SOURCE_COMPILES(" #include int main() { int i; #pragma omp parallel for for(i=0;i < 10;i++) { putchar(0); } }" COMPILER_SUPPORTS_OPENMP) endif(OPENMP_FOUND) # Check weak aliases are supported. CHECK_C_SOURCE_COMPILES(" #if defined(__CYGWIN__) #define EXPORT __stdcall __declspec(dllexport) #else #define EXPORT #endif EXPORT int f(int a) { return a + 2; } EXPORT int g(int a) __attribute__((weak, alias(\"f\"))); int main(void) { return g(2); }" COMPILER_SUPPORTS_WEAK_ALIASES) if (COMPILER_SUPPORTS_WEAK_ALIASES AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm" AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64" AND NOT MINGW AND BUILD_GNUABI_LIBS) set(ENABLE_GNUABI ${COMPILER_SUPPORTS_WEAK_ALIASES}) endif() CHECK_C_SOURCE_COMPILES(" int main(void) { double a = __builtin_sqrt (2); float b = __builtin_sqrtf(2); }" COMPILER_SUPPORTS_BUILTIN_MATH) # Reset used flags set(CMAKE_REQUIRED_FLAGS) set(CMAKE_REQUIRED_LIBRARIES) # Save the default C flags set(ORG_CMAKE_C_FLAGS CMAKE_C_FLAGS) # Check if sde64 command is available find_program(SDE_COMMAND sde64) if (NOT SDE_COMMAND) find_program(SDE_COMMAND sde) endif() # Check if armie command is available find_program(ARMIE_COMMAND armie) if (NOT SVE_VECTOR_BITS) set(SVE_VECTOR_BITS 128) endif() ## if(SLEEF_SHOW_ERROR_LOG) if (EXISTS ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeError.log) file(READ ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeError.log FILE_CONTENT) message("${FILE_CONTENT}") endif() endif(SLEEF_SHOW_ERROR_LOG) # Detect if cmake is running on Travis string(COMPARE NOTEQUAL "" "$ENV{TRAVIS}" RUNNING_ON_TRAVIS) if (${RUNNING_ON_TRAVIS} AND CMAKE_C_COMPILER_ID MATCHES "Clang") message("Travix bug workaround turned on") set(COMPILER_SUPPORTS_OPENMP FALSE) # Workaround for https://github.com/travis-ci/travis-ci/issues/8613 set(COMPILER_SUPPORTS_FLOAT128 FALSE) # Compilation on unroll_0_vecextqp.c does not finish on Travis endif() if (MSVC) set(COMPILER_SUPPORTS_OPENMP FALSE) # At this time, OpenMP is not supported on MSVC endif() # Set common definitions if (NOT BUILD_SHARED_LIBS) set(COMMON_TARGET_DEFINITIONS SLEEF_STATIC_LIBS=1) endif() if (COMPILER_SUPPORTS_WEAK_ALIASES) set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_ALIAS=1) endif() # When cross compiling for ppc64, this bug-workaround is needed if(CMAKE_CROSSCOMPILING AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} POWER64_UNDEF_USE_EXTERN_INLINES=1) endif() sleef-3.3.1/Jenkinsfile000066400000000000000000000143071333715643700150210ustar00rootroot00000000000000pipeline { agent any stages { stage('Preamble') { parallel { stage('AArch64 SVE') { agent { label 'aarch64' } steps { sh ''' echo "AArch64 SVE on" `hostname` export PATH=$PATH:/opt/arm/arm-instruction-emulator-1.2.1_Generic-AArch64_Ubuntu-14.04_aarch64-linux/bin export LD_LIBRARY_PATH=/opt/arm/arm-instruction-emulator-1.2.1_Generic-AArch64_Ubuntu-14.04_aarch64-linux/lib:/opt/arm/arm-hpc-compiler-18.1_Generic-AArch64_Ubuntu-16.04_aarch64-linux/lib export CC=/opt/arm/arm-hpc-compiler-18.1_Generic-AArch64_Ubuntu-16.04_aarch64-linux/bin/armclang rm -rf build mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. make -j 4 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 make install ''' } } stage('Intel Compiler') { agent { label 'icc' } steps { sh ''' echo "Intel Compiler on" `hostname` export PATH=$PATH:/export/opt/sde-external-8.16.0-2018-01-30-lin:/export/opt/compilers_and_libraries_2018/linux/bin/intel64 export LD_LIBRARY_PATH=/export/opt/compilers_and_libraries_2018.1.163/linux/compiler/lib/intel64_lin/ export CC=icc rm -rf build mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. make -j 4 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 make install ''' } } stage('FMA4') { agent { label 'fma4' } steps { sh ''' echo "FMA4 on" `hostname` export PATH=$PATH:/opt/local/bin:/opt/bin:/opt/sde-external-8.16.0-2018-01-30-lin export LD_LIBRARY_PATH=/opt/local/lib:/opt/lib rm -rf build mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. make -j 4 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 make install ''' } } stage('GCC-4.8') { agent { label 'x86' } steps { sh ''' echo "gcc-4 on" `hostname` export PATH=$PATH:/opt/sde-external-8.16.0-2018-01-30-lin export CC=gcc-4.8 rm -rf build mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. make -j 4 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 make install ''' } } stage('Static libs on mac') { agent { label 'mac' } steps { sh ''' echo "On" `hostname` export PATH=$PATH:/opt/local/bin:/opt/local/bin:/usr/local/bin:/usr/bin:/bin rm -rf build mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE .. make -j 2 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 2 make install ''' } } stage('Windows') { agent { label 'win' } steps { bat ''' set "PROJECT_DIR=%cd%" set "ORG_PATH=%PATH%" PATH C:/Cygwin64/bin;C:/Cygwin64/usr/bin;%PROJECT_DIR%/build-cygwin/bin;%PATH% rmdir /S /Q build-cygwin C:/Cygwin64/bin/bash -c 'mkdir build-cygwin;cd build-cygwin;cmake -g"Unix Makefiles" ..;make -j 4' del /Q /F %PROJECT_DIR%/build-cygwin/bin/iut* PATH %ORG_PATH%;C:/Cygwin64/bin;C:/Cygwin64/usr/bin;%PROJECT_DIR%/build-cygwin/bin;%PROJECT_DIR%/build/bin cd %PROJECT_DIR% rmdir /S /Q build mkdir build cd build cmake -G"Visual Studio 15 2017 Win64" .. -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DBUILD_SHARED_LIBS=FALSE cmake --build . --target install --config Release ctest --output-on-failure -j 4 -C Release ''' } } stage('i386') { agent { label 'i386' } steps { sh ''' echo "i386 on" `hostname` rm -rf build mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. make -j 4 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 make install ''' } } stage('FreeBSD') { agent { label 'freebsd' } steps { sh ''' echo "FreeBSD on" `hostname` rm -rf build mkdir build cd build cmake -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. make -j 2 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 2 make install ''' } } stage('PowerPC VSX') { agent { label 'x86 && xenial' } steps { sh ''' echo "PowerPC VSX on" `hostname` rm -rf build-native mkdir build-native cd build-native cmake -DSLEEF_SHOW_CONFIG=1 .. make -j 4 all cd .. export PATH=$PATH:`pwd`/travis export QEMU_CPU=POWER8 chmod +x travis/ppc64el-cc rm -rf build mkdir build cd build cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-ppc64el.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-ppc64le-static -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. make -j 4 all export OMP_WAIT_POLICY=passive export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 4 make install ''' } } } } } } sleef-3.3.1/LICENSE.txt000066400000000000000000000024721333715643700144600ustar00rootroot00000000000000Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. sleef-3.3.1/README.md000066400000000000000000000011351333715643700141070ustar00rootroot00000000000000[![Build Status](https://travis-ci.org/shibatch/sleef.svg?branch=master)](https://travis-ci.org/shibatch/sleef) In this library, functions for evaluating some elementary functions are implemented. The library also includes DFT subroutines. The software is distributed under the Boost Software License, Version 1.0. See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt. Contributions to this project are accepted under the same license. Copyright Naoki Shibata and contributors 2010 - 2017. Main Page : http://sleef.org/ GitHub Repo : https://github.com/shibatch/sleef sleef-3.3.1/appveyor.yml000066400000000000000000000030211333715643700152140ustar00rootroot00000000000000version: 1.0.{build} max_jobs: 1 image: Visual Studio 2017 configuration: Release environment: matrix: - ENV_BUILD_STATIC: -DBUILD_SHARED_LIBS=TRUE DO_TEST: TRUE - ENV_BUILD_STATIC: -DBUILD_SHARED_LIBS=FALSE DO_TEST: FALSE install: - if "%DO_TEST%" == "TRUE" set ORGPATH="%PATH%" - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\setup-x86_64.exe" -q -g -P libmpfr-devel,libgmp-devel,cmake - if "%DO_TEST%" == "TRUE" PATH c:\Cygwin64\bin;c:\Cygwin64\usr\bin;c:\projects\sleef\build-cygwin\bin;"%PATH%" - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-mingw;cd build-mingw;CC=x86_64-w64-mingw32-gcc cmake -g\"Unix Makefiles\" .. -DBUILD_SHARED_LIBS=FALSE;make -j 2' - if "%DO_TEST%" == "TRUE" cd "c:\\projects\\sleef" - if "%DO_TEST%" == "TRUE" "C:\\Cygwin64\\bin\\bash" -c 'mkdir build-cygwin;cd build-cygwin;cmake -g\"Unix Makefiles\" ..;make -j 2' - if "%DO_TEST%" == "TRUE" del /Q /F c:\projects\sleef\build-cygwin\bin\iut* - if "%DO_TEST%" == "TRUE" PATH "%ORGPATH%";c:\Cygwin64\bin;c:\Cygwin64\usr\bin;c:\projects\sleef\build-cygwin\bin;c:\projects\sleef\build\bin - if "%DO_TEST%" == "TRUE" cd "c:\\projects\\sleef" - mkdir build - cd build - cmake -G"Visual Studio 15 2017 Win64" .. -DCMAKE_INSTALL_PREFIX=install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_SHOW_ERROR_LOG=1 %ENV_BUILD_STATIC% build_script: - cmake --build . --target install --config Release test_script: - if "%DO_TEST%" == "TRUE" (ctest --output-on-failure -j 2 -C Release) artifacts: - path: build\install\**\* name: SLEEFWindowsx64 sleef-3.3.1/doc/000077500000000000000000000000001333715643700133755ustar00rootroot00000000000000sleef-3.3.1/doc/build-with-cmake.md000066400000000000000000000074541333715643700170570ustar00rootroot00000000000000# Introduction [Cmake](http://www.cmake.org/) is an open-source and cross-platform building tool for software packages that provides easy managing of multiple build systems at a time. It works by allowing the developer to specify build parameters and rules in a simple text file that cmake then processes to generate project files for the actual native build tools (e.g. UNIX Makefiles, Microsoft Visual Studio, Apple XCode, etc). That means you can easily maintain multiple separate builds for one project and manage cross-platform hardware and software complexity. If you are not already familiar with cmake, please refer to the [official documentation](https://cmake.org/documentation/) or the [Basic Introductions](https://cmake.org/Wiki/CMake#Basic_Introductions) in the wiki (recommended). Before using CMake you will need to install/build the binaries on your system. Most systems have cmake already installed or provided by the standard package manager. If that is not the case for you, please [download](https://cmake.org/download/) and install now. For building SLEEF, version 3.4.3 is the minimum required. # Quick start 1. Make sure cmake is available on the command-line. ``` $ cmake --version (should display a version number greater than or equal to 3.4.3) ``` 2. Download the tar from the [software repository](http://shibatch.sourceforge.net/) or checkout out the source code from the [github repository](https://github.com/shibatch/sleef): ``` $ git clone https://github.com/shibatch/sleef ``` 3. Make a separate directory to create an out-of-source build. SLEEF does not allow for in-tree builds. ``` $ cd sleef-project $ mkdir my-sleef-build && cd my-sleef-build ``` 4. Run cmake to configure your project and generate the system to build it: ``` $ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_INSTALL_PREFIX=../my-sleef-install \ .. ``` This flag configures an optimised `libsleef` shared library build with basic debug info. By default, cmake will autodetect your system platform and configure the build using the default parameters. You can control and modify these parameters by setting variables when running cmake. See the list of [options and variables](#build-customization) for customizing your build. > NOTE: On **Windows**, you need to use a specific generator like this: > `cmake -G"Visual Studio 14 2015 Win64" ..` specifying the Visual Studio version > and targeting specifically `Win64` (to support compilation of AVX/AVX2) > Check `cmake -G` to get a full list of supported Visual Studio project generators. > This generator will create a proper solution `SLEEF.sln` under the build > directory. > You can still use `cmake --build .` to build without opening Visual Studio. 5. Now that you have the build files created by cmake, proceed from the top of the build directory: ``` $ make sleef ``` 6. Install the library under ../my-sleef/install by running: ``` $ make install ``` 7. You can execute the tests by running: ``` $ make test ``` # Build customization Variables dictate how the build is generated; options are defined and undefined, respectively, on the cmake command line like this: ``` cmake -DVARIABLE= cmake -UVARIABLE ``` Build configurations allow a project to be built in different ways for debug, optimized, or any other special set of flags. ## CMake Variables - `CMAKE_BUILD_TYPE`: By default, CMake supports the following configuration: * `Debug`: basic debug flags turned on * `Release`: basic optimizations turned on * `MinSizeRel`: builds the smallest (but not fastest) object code * `RelWithDebInfo`: builds optimized code with debug information as well - `CMAKE_INSTALL_PREFIX`: The prefix the use when running `make install`. Defaults to /usr/local on GNU/Linux and MacOS. Defaults to C:/Program Files on Windows. ## SLEEF Variables sleef-3.3.1/doc/html/000077500000000000000000000000001333715643700143415ustar00rootroot00000000000000sleef-3.3.1/doc/html/CMakeLists.txt000066400000000000000000000013051333715643700171000ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.4.3) include(ExternalProject) find_package(Git REQUIRED) ExternalProject_Add(libsleef GIT_REPOSITORY https://github.com/shibatch/sleef CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/contrib ) include_directories(${CMAKE_BINARY_DIR}/contrib/include) link_directories(${CMAKE_BINARY_DIR}/contrib/lib) add_executable(hellox86 hellox86.c) add_dependencies(hellox86 libsleef) target_link_libraries(hellox86 sleef) # option(BUILD_DFT_TUTORIAL "Build DFT tutorial" OFF) if (BUILD_DFT_TUTORIAL) add_executable(dfttutorial tutorial.c) add_dependencies(dfttutorial libsleef) find_library(LIBM m) target_link_libraries(dfttutorial sleef sleefdft ${LIBM}) endif() sleef-3.3.1/doc/html/CNAME000066400000000000000000000000111333715643700150770ustar00rootroot00000000000000sleef.orgsleef-3.3.1/doc/html/aarch32.xhtml000066400000000000000000001550651333715643700166560ustar00rootroot00000000000000 SLEEF Documentation

SLEEF Documentation - Math library reference

Table of contents

Data types for AArch32 architecture

Sleef_float32x4_t_2

Description

Sleef_float32x4_t_2 is a data type for storing two float32x4_t values, which is defined in sleef.h as follows:

typedef struct {
  float32x4_t x, y;
} Sleef_float32x4_t_2;

Trigonometric Functions

Vectorized single precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinf4_u10(float32x4_t a);
float32x4_t Sleef_sinf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinf4_u35(float32x4_t a);
float32x4_t Sleef_sinf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cosf4_u10(float32x4_t a);
float32x4_t Sleef_cosf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cosf4_u35(float32x4_t a);
float32x4_t Sleef_cosf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision combined sine and cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincosf4_u10(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincosf4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinpif4_u05(float32x4_t a);
float32x4_t Sleef_sinpif4_u05neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cospif4_u05(float32x4_t a);
float32x4_t Sleef_cospif4_u05neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincospif4_u05(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u05neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincospif4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanf4_u10(float32x4_t a);
float32x4_t Sleef_tanf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanf4_u35(float32x4_t a);
float32x4_t Sleef_tanf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Power, exponential, and logarithmic function

Vectorized single precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_powf4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_powf4_u10neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_powf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_logf4_u10(float32x4_t a);
float32x4_t Sleef_logf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_logf4_u35(float32x4_t a);
float32x4_t Sleef_logf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_log10f4_u10(float32x4_t a);
float32x4_t Sleef_log10f4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_log2f4_u10(float32x4_t a);
float32x4_t Sleef_log2f4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_log1pf4_u10(float32x4_t a);
float32x4_t Sleef_log1pf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1pf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_expf4_u10(float32x4_t a);
float32x4_t Sleef_expf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_exp2f4_u10(float32x4_t a);
float32x4_t Sleef_exp2f4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-10 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_exp10f4_u10(float32x4_t a);
float32x4_t Sleef_exp10f4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_expm1f4_u10(float32x4_t a);
float32x4_t Sleef_expm1f4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sqrtf4(float32x4_t a);
float32x4_t Sleef_sqrtf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sqrtf4_u35(float32x4_t a);
float32x4_t Sleef_sqrtf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cbrtf4_u10(float32x4_t a);
float32x4_t Sleef_cbrtf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cbrtf4_u35(float32x4_t a);
float32x4_t Sleef_cbrtf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_hypotf4_u05(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u05neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u05. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_hypotf4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u35neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Inverse Trigonometric Functions

Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_asinf4_u10(float32x4_t a);
float32x4_t Sleef_asinf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_asinf4_u35(float32x4_t a);
float32x4_t Sleef_asinf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_acosf4_u10(float32x4_t a);
float32x4_t Sleef_acosf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_acosf4_u35(float32x4_t a);
float32x4_t Sleef_acosf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atanf4_u10(float32x4_t a);
float32x4_t Sleef_atanf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atanf4_u35(float32x4_t a);
float32x4_t Sleef_atanf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atan2f4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u10neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atan2f4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u35neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Hyperbolic function and inverse hyperbolic function

Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinhf4_u10(float32x4_t a);
float32x4_t Sleef_sinhf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinhf4_u35(float32x4_t a);
float32x4_t Sleef_sinhf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_coshf4_u10(float32x4_t a);
float32x4_t Sleef_coshf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_coshf4_u35(float32x4_t a);
float32x4_t Sleef_coshf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanhf4_u10(float32x4_t a);
float32x4_t Sleef_tanhf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanhf4_u35(float32x4_t a);
float32x4_t Sleef_tanhf4_u35neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u35. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_asinhf4_u10(float32x4_t a);
float32x4_t Sleef_asinhf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_acoshf4_u10(float32x4_t a);
float32x4_t Sleef_acoshf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acoshf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

float32x4_t Sleef_atanhf4_u10(float32x4_t a);
float32x4_t Sleef_atanhf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanhf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Error and gamma function

Vectorized single precision error function

Synopsis

#include <sleef.h>

float32x4_t Sleef_erff4_u10(float32x4_t a);
float32x4_t Sleef_erff4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erff_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision complementary error function

Synopsis

#include <sleef.h>

float32x4_t Sleef_erfcf4_u15(float32x4_t a);
float32x4_t Sleef_erfcf4_u15neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfcf_u15. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision gamma function

Synopsis

#include <sleef.h>

float32x4_t Sleef_tgammaf4_u10(float32x4_t a);
float32x4_t Sleef_tgammaf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgammaf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision log gamma function

Synopsis

#include <sleef.h>

float32x4_t Sleef_lgammaf4_u10(float32x4_t a);
float32x4_t Sleef_lgammaf4_u10neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgammaf_u10. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Nearest integer function

Vectorized single precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

float32x4_t Sleef_truncf4(float32x4_t a);
float32x4_t Sleef_truncf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_truncf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

float32x4_t Sleef_floorf4(float32x4_t a);
float32x4_t Sleef_floorf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floorf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

float32x4_t Sleef_ceilf4(float32x4_t a);
float32x4_t Sleef_ceilf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceilf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float32x4_t Sleef_roundf4(float32x4_t a);
float32x4_t Sleef_roundf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_roundf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float32x4_t Sleef_rintf4(float32x4_t a);
float32x4_t Sleef_rintf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rintf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

Other function

Vectorized single precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmaf4(float32x4_t a, float32x4_t b, float32x4_t c);
float32x4_t Sleef_fmaf4_neon(float32x4_t a, float32x4_t b, float32x4_t c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmodf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmodf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmodf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

float32x4_t Sleef_frfrexpf4(float32x4_t a);
float32x4_t Sleef_frfrexpf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexpf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_modff4(float32x4_t a);
Sleef_float32x4_t_2 Sleef_modff4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modff. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for calculating the absolute value

Synopsis

#include <sleef.h>

float32x4_t Sleef_fabsf4(float32x4_t a);
float32x4_t Sleef_fabsf4_neon(float32x4_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabsf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for copying signs

Synopsis

#include <sleef.h>

float32x4_t Sleef_copysignf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_copysignf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysignf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for determining maximum of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmaxf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmaxf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaxf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for determining minimum of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fminf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fminf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fminf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fdimf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fdimf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdimf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.


Vectorized single precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

float32x4_t Sleef_nextafterf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_nextafterf4_neon(float32x4_t a, float32x4_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafterf. This function may less accurate than the scalar function since AArch32 NEON is not IEEE 754-compliant.

sleef-3.3.1/doc/html/aarch64.xhtml000066400000000000000000003561361333715643700166650ustar00rootroot00000000000000 SLEEF Documentation

SLEEF Documentation - Math library reference

Table of contents

Data types for AArch64 architecture

Sleef_float32x4_t_2

Description

Sleef_float32x4_t_2 is a data type for storing two float32x4_t values, which is defined in sleef.h as follows:

typedef struct {
  float32x4_t x, y;
} Sleef_float32x4_t_2;

Sleef_float64x2_t_2

Description

Sleef_float64x2_t_2 is a data type for storing two float64x2_t values, which is defined in sleef.h as follows:

typedef struct {
  float64x2_t x, y;
} Sleef_float64x2_t_2;

Sleef_svfloat32_t_2

Description

Sleef_svfloat32_t_2 is a data type for storing two svfloat32_t values, which is defined in sleef.h as follows:

typedef struct {
  svfloat32_t x, y;
} Sleef_svfloat32_t_2;

Sleef_svfloat64_t_2

Description

Sleef_svfloat64_t_2 is a data type for storing two svfloat64_t values, which is defined in sleef.h as follows:

typedef struct {
  svfloat64_t x, y;
} Sleef_svfloat64_t_2;

Trigonometric Functions

Vectorized double precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_sind2_u10(float64x2_t a);
float64x2_t Sleef_sind2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_sindx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sin_u10 with the same accuracy specification.


Vectorized single precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinf4_u10(float32x4_t a);
float32x4_t Sleef_sinf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_sinfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u10 with the same accuracy specification.


Vectorized double precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_sind2_u35(float64x2_t a);
float64x2_t Sleef_sind2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_sindx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sin_u35 with the same accuracy specification.


Vectorized single precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinf4_u35(float32x4_t a);
float32x4_t Sleef_sinf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_sinfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u35 with the same accuracy specification.


Vectorized double precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_cosd2_u10(float64x2_t a);
float64x2_t Sleef_cosd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_cosdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cos_u10 with the same accuracy specification.


Vectorized single precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cosf4_u10(float32x4_t a);
float32x4_t Sleef_cosf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_cosfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u10 with the same accuracy specification.


Vectorized double precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_cosd2_u35(float64x2_t a);
float64x2_t Sleef_cosd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_cosdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cos_u35 with the same accuracy specification.


Vectorized single precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cosf4_u35(float32x4_t a);
float32x4_t Sleef_cosf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_cosfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float64x2_t_2 Sleef_sincosd2_u10(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincosd2_u10advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_sincosdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincos_u10 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincosf4_u10(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u10advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_sincosfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u10 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float64x2_t_2 Sleef_sincosd2_u35(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincosd2_u35advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_sincosdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincos_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincosf4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincosf4_u35advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_sincosfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u35 with the same accuracy specification.


Vectorized double precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_sinpid2_u05(float64x2_t a);
float64x2_t Sleef_sinpid2_u05advsimd(float64x2_t a);
svfloat64_t Sleef_sinpidx_u05sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpi_u05 with the same accuracy specification.


Vectorized single precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinpif4_u05(float32x4_t a);
float32x4_t Sleef_sinpif4_u05advsimd(float32x4_t a);
svfloat32_t Sleef_sinpifx_u05sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpif_u05 with the same accuracy specification.


Vectorized double precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_cospid2_u05(float64x2_t a);
float64x2_t Sleef_cospid2_u05advsimd(float64x2_t a);
svfloat64_t Sleef_cospidx_u05sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospi_u05 with the same accuracy specification.


Vectorized single precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cospif4_u05(float32x4_t a);
float32x4_t Sleef_cospif4_u05advsimd(float32x4_t a);
svfloat32_t Sleef_cospifx_u05sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float64x2_t_2 Sleef_sincospid2_u05(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincospid2_u05advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_sincospidx_u05sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospi_u05 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincospif4_u05(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u05advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_sincospifx_u05sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float64x2_t_2 Sleef_sincospid2_u35(float64x2_t a);
Sleef_float64x2_t_2 Sleef_sincospid2_u35advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_sincospidx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospi_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_sincospif4_u35(float32x4_t a);
Sleef_float32x4_t_2 Sleef_sincospif4_u35advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_sincospifx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u35 with the same accuracy specification.


Vectorized double precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_tand2_u10(float64x2_t a);
float64x2_t Sleef_tand2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_tandx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tan_u10 with the same accuracy specification.


Vectorized single precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanf4_u10(float32x4_t a);
float32x4_t Sleef_tanf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_tanfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u10 with the same accuracy specification.


Vectorized double precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_tand2_u35(float64x2_t a);
float64x2_t Sleef_tand2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_tandx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tan_u35 with the same accuracy specification.


Vectorized single precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanf4_u35(float32x4_t a);
float32x4_t Sleef_tanf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_tanfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u35 with the same accuracy specification.

Power, exponential, and logarithmic function

Vectorized double precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_powd2_u10(float64x2_t a, float64x2_t b);
float64x2_t Sleef_powd2_u10advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_powdx_u10sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_pow_u10 with the same accuracy specification.


Vectorized single precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_powf4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_powf4_u10advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_powfx_u10sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_powf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_logd2_u10(float64x2_t a);
float64x2_t Sleef_logd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_logdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log_u10 with the same accuracy specification.


Vectorized single precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_logf4_u10(float32x4_t a);
float32x4_t Sleef_logf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_logfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_logd2_u35(float64x2_t a);
float64x2_t Sleef_logd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_logdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log_u35 with the same accuracy specification.


Vectorized single precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_logf4_u35(float32x4_t a);
float32x4_t Sleef_logf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_logfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u35 with the same accuracy specification.


Vectorized double precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_log10d2_u10(float64x2_t a);
float64x2_t Sleef_log10d2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_log10dx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10_u10 with the same accuracy specification.


Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_log10f4_u10(float32x4_t a);
float32x4_t Sleef_log10f4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_log10fx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10f_u10 with the same accuracy specification.


Vectorized double precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_log2d2_u10(float64x2_t a);
float64x2_t Sleef_log2d2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_log2dx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2_u10 with the same accuracy specification.


Vectorized single precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_log2f4_u10(float32x4_t a);
float32x4_t Sleef_log2f4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_log2fx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2f_u10 with the same accuracy specification.


Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_log1pd2_u10(float64x2_t a);
float64x2_t Sleef_log1pd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_log1pdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1p_u10 with the same accuracy specification.


Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_log1pf4_u10(float32x4_t a);
float32x4_t Sleef_log1pf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_log1pfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1pf_u10 with the same accuracy specification.


Vectorized double precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_expd2_u10(float64x2_t a);
float64x2_t Sleef_expd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_expdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp_u10 with the same accuracy specification.


Vectorized single precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_expf4_u10(float32x4_t a);
float32x4_t Sleef_expf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_expfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expf_u10 with the same accuracy specification.


Vectorized double precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_exp2d2_u10(float64x2_t a);
float64x2_t Sleef_exp2d2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_exp2dx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2_u10 with the same accuracy specification.


Vectorized single precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_exp2f4_u10(float32x4_t a);
float32x4_t Sleef_exp2f4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_exp2fx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2f_u10 with the same accuracy specification.


Vectorized double precision base-10 exponential function function with 1.09 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_exp10d2_u10(float64x2_t a);
float64x2_t Sleef_exp10d2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_exp10dx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10_u10 with the same accuracy specification.


Vectorized single precision base-10 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_exp10f4_u10(float32x4_t a);
float32x4_t Sleef_exp10f4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_exp10fx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10f_u10 with the same accuracy specification.


Vectorized double precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_expm1d2_u10(float64x2_t a);
float64x2_t Sleef_expm1d2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_expm1dx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1_u10 with the same accuracy specification.


Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_expm1f4_u10(float32x4_t a);
float32x4_t Sleef_expm1f4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_expm1fx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1f_u10 with the same accuracy specification.


Vectorized double precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_sqrtd2(float64x2_t a);
float64x2_t Sleef_sqrtd2_advsimd(float64x2_t a);
svfloat64_t Sleef_sqrtdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrt_u05 with the same accuracy specification.


Vectorized single precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sqrtf4(float32x4_t a);
float32x4_t Sleef_sqrtf4_advsimd(float32x4_t a);
svfloat32_t Sleef_sqrtfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u05 with the same accuracy specification.


Vectorized double precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_sqrtd2_u35(float64x2_t a);
float64x2_t Sleef_sqrtd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_sqrtdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrt_u35 with the same accuracy specification.


Vectorized single precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_sqrtf4_u35(float32x4_t a);
float32x4_t Sleef_sqrtf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_sqrtfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u35 with the same accuracy specification.


Vectorized double precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_cbrtd2_u10(float64x2_t a);
float64x2_t Sleef_cbrtd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_cbrtdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrt_u10 with the same accuracy specification.


Vectorized single precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cbrtf4_u10(float32x4_t a);
float32x4_t Sleef_cbrtf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_cbrtfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u10 with the same accuracy specification.


Vectorized double precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_cbrtd2_u35(float64x2_t a);
float64x2_t Sleef_cbrtd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_cbrtdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrt_u35 with the same accuracy specification.


Vectorized single precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_cbrtf4_u35(float32x4_t a);
float32x4_t Sleef_cbrtf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_cbrtfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u35 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_hypotd2_u05(float64x2_t a, float64x2_t b);
float64x2_t Sleef_hypotd2_u05advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_hypotdx_u05sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypot_u05 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_hypotf4_u05(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u05advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_hypotfx_u05sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u05 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_hypotd2_u35(float64x2_t a, float64x2_t b);
float64x2_t Sleef_hypotd2_u35advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_hypotdx_u35sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypot_u35 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_hypotf4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_hypotf4_u35advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_hypotfx_u35sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u35 with the same accuracy specification.

Inverse Trigonometric Functions

Vectorized double precision arc sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_asind2_u10(float64x2_t a);
float64x2_t Sleef_asind2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_asindx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asin_u10 with the same accuracy specification.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_asinf4_u10(float32x4_t a);
float32x4_t Sleef_asinf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_asinfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u10 with the same accuracy specification.


Vectorized double precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_asind2_u35(float64x2_t a);
float64x2_t Sleef_asind2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_asindx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asin_u35 with the same accuracy specification.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_asinf4_u35(float32x4_t a);
float32x4_t Sleef_asinf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_asinfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u35 with the same accuracy specification.


Vectorized double precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_acosd2_u10(float64x2_t a);
float64x2_t Sleef_acosd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_acosdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acos_u10 with the same accuracy specification.


Vectorized single precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_acosf4_u10(float32x4_t a);
float32x4_t Sleef_acosf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_acosfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u10 with the same accuracy specification.


Vectorized double precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_acosd2_u35(float64x2_t a);
float64x2_t Sleef_acosd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_acosdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acos_u35 with the same accuracy specification.


Vectorized single precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_acosf4_u35(float32x4_t a);
float32x4_t Sleef_acosf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_acosfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u35 with the same accuracy specification.


Vectorized double precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_atand2_u10(float64x2_t a);
float64x2_t Sleef_atand2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_atandx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan_u10 with the same accuracy specification.


Vectorized single precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atanf4_u10(float32x4_t a);
float32x4_t Sleef_atanf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_atanfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u10 with the same accuracy specification.


Vectorized double precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_atand2_u35(float64x2_t a);
float64x2_t Sleef_atand2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_atandx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan_u35 with the same accuracy specification.


Vectorized single precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atanf4_u35(float32x4_t a);
float32x4_t Sleef_atanf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_atanfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u35 with the same accuracy specification.


Vectorized double precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_atan2d2_u10(float64x2_t a, float64x2_t b);
float64x2_t Sleef_atan2d2_u10advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_atan2dx_u10sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2_u10 with the same accuracy specification.


Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atan2f4_u10(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u10advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_atan2fx_u10sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u10 with the same accuracy specification.


Vectorized double precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float64x2_t Sleef_atan2d2_u35(float64x2_t a, float64x2_t b);
float64x2_t Sleef_atan2d2_u35advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_atan2dx_u35sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2_u35 with the same accuracy specification.


Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float32x4_t Sleef_atan2f4_u35(float32x4_t a, float32x4_t b);
float32x4_t Sleef_atan2f4_u35advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_atan2fx_u35sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u35 with the same accuracy specification.

Hyperbolic function and inverse hyperbolic function

Vectorized double precision hyperbolic sine function

Synopsis

#include <sleef.h>

float64x2_t Sleef_sinhd2_u10(float64x2_t a);
float64x2_t Sleef_sinhd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_sinhdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinhf4_u10(float32x4_t a);
float32x4_t Sleef_sinhf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_sinhfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic sine function

Synopsis

#include <sleef.h>

float64x2_t Sleef_sinhd2_u35(float64x2_t a);
float64x2_t Sleef_sinhd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_sinhdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_sinhf4_u35(float32x4_t a);
float32x4_t Sleef_sinhf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_sinhfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float64x2_t Sleef_coshd2_u10(float64x2_t a);
float64x2_t Sleef_coshd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_coshdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_coshf4_u10(float32x4_t a);
float32x4_t Sleef_coshf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_coshfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float64x2_t Sleef_coshd2_u35(float64x2_t a);
float64x2_t Sleef_coshd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_coshdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_coshf4_u35(float32x4_t a);
float32x4_t Sleef_coshf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_coshfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float64x2_t Sleef_tanhd2_u10(float64x2_t a);
float64x2_t Sleef_tanhd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_tanhdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanhf4_u10(float32x4_t a);
float32x4_t Sleef_tanhf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_tanhfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float64x2_t Sleef_tanhd2_u35(float64x2_t a);
float64x2_t Sleef_tanhd2_u35advsimd(float64x2_t a);
svfloat64_t Sleef_tanhdx_u35sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

float32x4_t Sleef_tanhf4_u35(float32x4_t a);
float32x4_t Sleef_tanhf4_u35advsimd(float32x4_t a);
svfloat32_t Sleef_tanhfx_u35sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u35 with the same accuracy specification.


Vectorized double precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

float64x2_t Sleef_asinhd2_u10(float64x2_t a);
float64x2_t Sleef_asinhd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_asinhdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_asinhf4_u10(float32x4_t a);
float32x4_t Sleef_asinhf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_asinhfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinhf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

float64x2_t Sleef_acoshd2_u10(float64x2_t a);
float64x2_t Sleef_acoshd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_acoshdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

float32x4_t Sleef_acoshf4_u10(float32x4_t a);
float32x4_t Sleef_acoshf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_acoshfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acoshf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

float64x2_t Sleef_atanhd2_u10(float64x2_t a);
float64x2_t Sleef_atanhd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_atanhdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

float32x4_t Sleef_atanhf4_u10(float32x4_t a);
float32x4_t Sleef_atanhf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_atanhfx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanhf_u10 with the same accuracy specification.

Error and gamma function

Vectorized double precision error function

Synopsis

#include <sleef.h>

float64x2_t Sleef_erfd2_u10(float64x2_t a);
float64x2_t Sleef_erfd2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_erfdx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erf_u10 with the same accuracy specification.


Vectorized single precision error function

Synopsis

#include <sleef.h>

float32x4_t Sleef_erff4_u10(float32x4_t a);
float32x4_t Sleef_erff4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_erffx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erff_u10 with the same accuracy specification.


Vectorized double precision complementary error function

Synopsis

#include <sleef.h>

float64x2_t Sleef_erfcd2_u15(float64x2_t a);
float64x2_t Sleef_erfcd2_u15advsimd(float64x2_t a);
svfloat64_t Sleef_erfcdx_u15sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfc_u15 with the same accuracy specification.


Vectorized single precision complementary error function

Synopsis

#include <sleef.h>

float32x4_t Sleef_erfcf4_u15(float32x4_t a);
float32x4_t Sleef_erfcf4_u15advsimd(float32x4_t a);
svfloat32_t Sleef_erfcfx_u15sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfcf_u15 with the same accuracy specification.


Vectorized double precision gamma function

Synopsis

#include <sleef.h>

float64x2_t Sleef_tgammad2_u10(float64x2_t a);
float64x2_t Sleef_tgammad2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_tgammadx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgamma_u10 with the same accuracy specification.


Vectorized single precision gamma function

Synopsis

#include <sleef.h>

float32x4_t Sleef_tgammaf4_u10(float32x4_t a);
float32x4_t Sleef_tgammaf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_tgammafx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgammaf_u10 with the same accuracy specification.


Vectorized double precision log gamma function

Synopsis

#include <sleef.h>

float64x2_t Sleef_lgammad2_u10(float64x2_t a);
float64x2_t Sleef_lgammad2_u10advsimd(float64x2_t a);
svfloat64_t Sleef_lgammadx_u10sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgamma_u10 with the same accuracy specification.


Vectorized single precision log gamma function

Synopsis

#include <sleef.h>

float32x4_t Sleef_lgammaf4_u10(float32x4_t a);
float32x4_t Sleef_lgammaf4_u10advsimd(float32x4_t a);
svfloat32_t Sleef_lgammafx_u10sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgammaf_u10 with the same accuracy specification.

Nearest integer function

Vectorized double precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

float64x2_t Sleef_truncd2(float64x2_t a);
float64x2_t Sleef_truncd2_advsimd(float64x2_t a);
svfloat64_t Sleef_truncdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_trunc with the same accuracy specification.


Vectorized single precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

float32x4_t Sleef_truncf4(float32x4_t a);
float32x4_t Sleef_truncf4_advsimd(float32x4_t a);
svfloat32_t Sleef_truncfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_truncf with the same accuracy specification.


Vectorized double precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

float64x2_t Sleef_floord2(float64x2_t a);
float64x2_t Sleef_floord2_advsimd(float64x2_t a);
svfloat64_t Sleef_floordx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floor with the same accuracy specification.


Vectorized single precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

float32x4_t Sleef_floorf4(float32x4_t a);
float32x4_t Sleef_floorf4_advsimd(float32x4_t a);
svfloat32_t Sleef_floorfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floorf with the same accuracy specification.


Vectorized double precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

float64x2_t Sleef_ceild2(float64x2_t a);
float64x2_t Sleef_ceild2_advsimd(float64x2_t a);
svfloat64_t Sleef_ceildx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceil with the same accuracy specification.


Vectorized single precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

float32x4_t Sleef_ceilf4(float32x4_t a);
float32x4_t Sleef_ceilf4_advsimd(float32x4_t a);
svfloat32_t Sleef_ceilfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceilf with the same accuracy specification.


Vectorized double precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float64x2_t Sleef_roundd2(float64x2_t a);
float64x2_t Sleef_roundd2_advsimd(float64x2_t a);
svfloat64_t Sleef_rounddx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_round with the same accuracy specification.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float32x4_t Sleef_roundf4(float32x4_t a);
float32x4_t Sleef_roundf4_advsimd(float32x4_t a);
svfloat32_t Sleef_roundfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_roundf with the same accuracy specification.


Vectorized double precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float64x2_t Sleef_rintd2(float64x2_t a);
float64x2_t Sleef_rintd2_advsimd(float64x2_t a);
svfloat64_t Sleef_rintdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rint with the same accuracy specification.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

float32x4_t Sleef_rintf4(float32x4_t a);
float32x4_t Sleef_rintf4_advsimd(float32x4_t a);
svfloat32_t Sleef_rintfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rintf with the same accuracy specification.

Other function

Vectorized double precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

float64x2_t Sleef_fmad2(float64x2_t a, float64x2_t b, float64x2_t c);
float64x2_t Sleef_fmad2_advsimd(float64x2_t a, float64x2_t b, float64x2_t c);
svfloat64_t Sleef_fmadx_sve(svfloat64_t a, svfloat64_t b, svfloat64_t c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fma with the same accuracy specification.


Vectorized single precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmaf4(float32x4_t a, float32x4_t b, float32x4_t c);
float32x4_t Sleef_fmaf4_advsimd(float32x4_t a, float32x4_t b, svfloat32_t c);
svfloat32_t Sleef_fmafx_sve(svfloat32_t a, svfloat32_t b, svfloat32_t c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

float64x2_t Sleef_fmodd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fmodd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fmoddx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmod with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmodf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmodf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fmodfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmodf with the same accuracy specification.


Vectorized double precision function for multiplying by integral power of 2

Synopsis

#include <sleef.h>

float64x2_t Sleef_ldexpd2(float64x2_t a, int32x2_t b);
float64x2_t Sleef_ldexpd2_advsimd(float64x2_t a, int32x2_t b);
svfloat64_t Sleef_ldexpdx_sve(svfloat64_t a, svint32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ldexp with the same accuracy specification.


Vectorized double precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

float64x2_t Sleef_frfrexpd2(float64x2_t a);
float64x2_t Sleef_frfrexpd2_advsimd(float64x2_t a);
svfloat64_t Sleef_frfrexpdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexp with the same accuracy specification.


Vectorized single precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

float32x4_t Sleef_frfrexpf4(float32x4_t a);
float32x4_t Sleef_frfrexpf4_advsimd(float32x4_t a);
svfloat32_t Sleef_frfrexpfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexpf with the same accuracy specification.


Vectorized double precision function for obtaining integral component of an FP number

Synopsis

#include <sleef.h>

int32x2_t Sleef_expfrexpd2(float64x2_t a);
int32x2_t Sleef_expfrexpd2_advsimd(float64x2_t a);
svint32_t Sleef_expfrexpdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expfrexp with the same accuracy specification.


Vectorized double precision function for getting integer exponent

Synopsis

#include <sleef.h>

int32x2_t Sleef_ilogbd2(float64x2_t a);
int32x2_t Sleef_ilogbd2_advsimd(float64x2_t a);
svint32_t Sleef_ilogbdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ilogb with the same accuracy specification.


Vectorized double precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_float64x2_t_2 Sleef_modfd2(float64x2_t a);
Sleef_float64x2_t_2 Sleef_modfd2_advsimd(float64x2_t a);
Sleef_svfloat64_t_2 Sleef_modfdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modf with the same accuracy specification.


Vectorized single precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_float32x4_t_2 Sleef_modff4(float32x4_t a);
Sleef_float32x4_t_2 Sleef_modff4_advsimd(float32x4_t a);
Sleef_svfloat32_t_2 Sleef_modffx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modff with the same accuracy specification.


Vectorized double precision function for calculating the absolute value

Synopsis

#include <sleef.h>

float64x2_t Sleef_fabsd2(float64x2_t a);
float64x2_t Sleef_fabsd2_advsimd(float64x2_t a);
svfloat64_t Sleef_fabsdx_sve(svfloat64_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabs with the same accuracy specification.


Vectorized single precision function for calculating the absolute value

Synopsis

#include <sleef.h>

float32x4_t Sleef_fabsf4(float32x4_t a);
float32x4_t Sleef_fabsf4_advsimd(float32x4_t a);
svfloat32_t Sleef_fabsfx_sve(svfloat32_t a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabsf with the same accuracy specification.


Vectorized double precision function for copying signs

Synopsis

#include <sleef.h>

float64x2_t Sleef_copysignd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_copysignd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_copysigndx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysign with the same accuracy specification.


Vectorized single precision function for copying signs

Synopsis

#include <sleef.h>

float32x4_t Sleef_copysignf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_copysignf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_copysignfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysignf with the same accuracy specification.


Vectorized double precision function for determining maximum of two values

Synopsis

#include <sleef.h>

float64x2_t Sleef_fmaxd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fmaxd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fmaxdx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmax with the same accuracy specification.


Vectorized single precision function for determining maximum of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fmaxf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fmaxf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fmaxfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaxf with the same accuracy specification.


Vectorized double precision function for determining minimum of two values

Synopsis

#include <sleef.h>

float64x2_t Sleef_fmind2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fmind2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fmindx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmin with the same accuracy specification.


Vectorized single precision function for determining minimum of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fminf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fminf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fminfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fminf with the same accuracy specification.


Vectorized double precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

float64x2_t Sleef_fdimd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_fdimd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_fdimdx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdim with the same accuracy specification.


Vectorized single precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

float32x4_t Sleef_fdimf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_fdimf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_fdimfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdimf with the same accuracy specification.


Vectorized double precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

float64x2_t Sleef_nextafterd2(float64x2_t a, float64x2_t b);
float64x2_t Sleef_nextafterd2_advsimd(float64x2_t a, float64x2_t b);
svfloat64_t Sleef_nextafterdx_sve(svfloat64_t a, svfloat64_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafter with the same accuracy specification.


Vectorized single precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

float32x4_t Sleef_nextafterf4(float32x4_t a, float32x4_t b);
float32x4_t Sleef_nextafterf4_advsimd(float32x4_t a, float32x4_t b);
svfloat32_t Sleef_nextafterfx_sve(svfloat32_t a, svfloat32_t b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafterf with the same accuracy specification.

sleef-3.3.1/doc/html/additional.xhtml000066400000000000000000000241041333715643700175300ustar00rootroot00000000000000 SLEEF Documentation

SLEEF Documentation - Additional Notes

Table of contents

About the GNUABI version of the library

The GNUABI version of the library (libsleefgnuabi.so) is built for x86 and aarch64 architectectures. This library provides an API compatible with libmvec in glibc, and the API comforms to the vector ABI. This library is built and installed by default, and some compilers may call the functions in this library.

How the dispatcher works

Fig. 7.1 shows a simplified code of our dispatcher. There is only one exported function mainFunc. When mainFunc is called for the first time, dispatcherMain is called internally, since funcPtr is initialized to the pointer to dispatcherMain(line 14). It then detects if the CPU supports SSE 4.1(line 7), and rewrites funcPtr to a pointer to the function that utilizes SSE 4.1 or SSE 2, depending on the result of CPU feature detection(line 10). When mainFunc is called for the second time, it does not execute the dispatcherMain. It just executes the function pointed by the pointer stored in funcPtr during the execution of dispatcherMain.

There are a few advantages in our dispatcher. The first advantage is that it does not require any compiler-specific extension. The second advantage is simplicity. There are only 18 lines of simple code. Since the dispatchers are completely separated for each function, there is not much room for bugs to get in.

The third advantage is low overhead. You might think that the overhead is one function call including execution of prologue and epilogue. However, since modern compilers eliminate redundant execution of the prologue, epilogue and return instruction, the actual overhead is just one jmp instruction. This is very fast since it is not conditional.

The fourth advantage is thread safety. There is only one variable shared among threads, which is funcPtr. There are only two possible values for this pointer variable. The first value is the pointer to the dispatcherMain, and the second value is the pointer to either funcSSE2 or funcSSE4, depending on the availability of extensions. Once funcPtr is substituted with the pointer to funcSSE2 or funcSSE4, it will not be changed in the future. It is obvious that the code works in all the cases.

static double (*funcPtr)(double arg);

static double dispatcherMain(double arg) {
    double (*p)(double arg) = funcSSE2;

#if the compiler supports SSE4.1
    if (SSE4.1 is available on the CPU) p = funcSSE4;
#endif

    funcPtr = p;
    return (*funcPtr)(arg);
}

static double (*funcPtr)(double arg) = dispatcherMain;

double mainFunc(double arg) {
    return (*funcPtr)(arg);
}

Fig. 7.1: Simplified code of our dispatcher

ULP, gradual underflow and flush-to-zero mode

ULP stands for "unit in the last place", which is sometimes used for measuring accuracy of calculations. 1 ULP is basically the distance between the two closest floating point number, which depends on the exponent of the FP number. The accuracy of calculations by reputable math libraries is usually between 0.5 and 1 ULP. Here, the accuracy means the largest error of calculation, which only happens in the worst case. SLEEF math library provides multiple accuracy choices for some math functions. Many functions have 3.5-ULP and 1-ULP versions, and 3.5-ULP versions are significantly faster than 1-ULP versions. If you care more about execution speed than accuracy, it is advised to use the 3.5-ULP versions along with -ffast-math or "unsafe math optimization" options for the compiler.

In IEEE 754 standard, underflow does not happen abruptly when the exponent becomes zero. Instead, denormal numbers are produced which has less precision, and this is sometimes called gradual underflow. On some implementation which is not IEEE-754 conformant, flush-to-zero mode is used since it is easier to implement. In flush-to-zero mode, numbers smaller than the smallest normalized number cannot be represented, and it is replaced with zero. Because of this, the accuracy of calculation may be influenced in some cases. The smallest normalized precision number can be referred with DBL_MIN for double precision, and FLT_MIN for single precision. The naming of these macros is a little bit confusing because DBL_MIN is not the smallest double precision number.

You can see known maximum errors in math functions in glibc on this page.

About sincospi

The sincospi series of functions evaluates sin( πa ) and cos( πa ) simultaneously. These functions are added to SLEEF as of version 3.0. There are a few reasons that I added these functions.

C standards include specifications for functions that evaluate trigonometric functions. In order to do calculations for evaluating these functions, reduction of an argument is required. This involves a multiple precision multiplication with π, which requires many operations of addition and multiplication. This is slow especially if accurate evaluation is required. By designing the function in a way that the argument is pre-multiplied by π, this reduction can be eliminated. This leads to faster and more accurate evaluation.

The second reason is that sincospi functions are handy for implementing an FFT library. FFT libraries need to evaluate trigonometric functions for generating twiddle factors that is used in the butterfly operations. Since the butterfly operations are repeatedly applied, the error in twiddle factors accumulates. Thus, we want to make the error in twiddle factors as small as possible. In an FFT of power-of-two size, twiddle factors are sin( πm / 2n ) where m and n are integer. If we just use the usual trigonometric functions defined in the C standards with the precision same as that used for butterfly operations, we already have error when calculating arguments, since πm / 2n cannot be represented as a floating point value without error. On the other hand, if we use sincospi function, the argument can be accurately represented by a radix 2 FP number. Thus, we can calculate twiddle factors with better accuracy.

The third reason is that sinpi is needed internally to implement gamma functions.

It is a soup ladle. Sleef means a soup ladle in Dutch.


logo
Fig. 7.2: SLEEF logo

sleef-3.3.1/doc/html/apple-touch-icon.png000066400000000000000000000043321333715643700202200ustar00rootroot00000000000000‰PNG  IHDRhÿ‹ pHYs5Ô5Ô^eåPLTEþþþ•••¼¼¼oooÞÞÞIII¨¨¨5r!hIDATx^í™»w²JÆ7È@+j¤Œ¡UñÒb$¦ˆ˜_Ô´B.þûß Ã0ÄKâûIqÖYç)Èà·ž}™ &pQÿ Ùf5 EbàòÇðÿäÈvb¤ÂÞÌuß´-(;¢-¡†6_z°âËuáÀNt«°Ô7>ÜxRìºã§ DW»9šÐΖ;×`I~±£¿" ä“r=€ÀÈ®»¦wr@“€`倣ìzC’)èÐ&•$ ˆ]ÈUË@i⨸ à Ûvtý c=Î(hv‚X€:$å ”ƒòhf à,¨#@ªÁ°ˆÌÒõ¤e'ÕÁ9¢—3`êòÈìäÈí H©äŽÐ€ƒî‹š¥ ÄsäéÊ<Õ¼¤h&jf뺡EÕéô.@j äB™CA¢HËíÏ€d·¡™-ó‹OQr­\¢ú1ˆˆälÉA´ÇydºŽ«I=á#Ðй£±??"G'j€Ð!mOBC±ÈQG„¦¸¥ÈˆL(é…$üRN’½phÍô¢f"O—Ê/Ÿ–_Ô,Ñï šä—àt¾!2<  q>7ÌT3ˆÞ”ƒ€õ¹‹cC€ÜŒ”5( À+0$@±14É®ÅeG²0$@l܈ª)¤Ž)h˜{â©¶ò’ P[<â‡÷ Ôã³Jc“µVÉvšÚCI®Ûǰ¤ˆW×ý†÷®K,ãn~¸c@×m„¡»&m·?<ÕVÞÔBãw¼£­º\r±\8 ÇAžj>Ñ„ºð2ßÜe§¹Äª ‹b 2=šeC·Š”Œ•þñ¡OK_‘!£ C +ýZØù7 1CfE†>+2”âj ÝJ I •:g3Î7Ýr{¡“T·ÏrÖso6\º&Ô†ÚΣóvht¾ÃjœÍ´ºW<5‚™ÌaåÔê!Œ.ºéZyJÓø„ú ›(…0솆.•^b  § jQì2¼ƒa;Zm&_+¯³ŽÑ†¿W;K?c ÚIË g ²A_Š>[ZO‰îæà±Ú;˽ˆšu– s$@;)ôÕS<º{fèà4.m²zT{c –#“ƒÚ¯¤N13”&6\P€åÌ‘ÒË@R”¶sÀù0Ñ(HîsC©yôöe¾÷¥q×ûêËñÔ€æº=ŠÍñ½·šÀ®Õ_ ¶ÌÐá§M&›†(„ìÐEˆCÜíÒ&qvÎc»ãÖ]/L6ªoÝõRŸê ÆP5†®3„+2ô1¸Ö ? õÌ+'cãç µèÊQmül¾†æÇ3´îë¿dÈJq.m¥H­wZÑ/†~-½ÕöZ/À?gˆ–þw)~!VúëÝj¤H!9 ð­†`¹†ÖæO·úÕP’+2²ÿQ†PUZü[ AU†d:«‰,5+Ju5†^飣 ÙV5©F©Ñý‡E–¤•päT¯*2Wr*Š ¥ú¾šU™4ª‰ÌÖ?+ŠL¯¦Ö?,E²ÜÁ‰|Õkub™H‘äë>0uêÀ%õ.@œD':ty7:zAÝA¯Uò…‰³$dµ¦:A–néÀ5™ß_Ï£ª]¨¨åû¾Î•bpt=i”@ž²ÖÞîÛ¦êÍÂîÛx¶yˆü¯Ú BŽî·H¨”ëY;Bð|ú"˜X+^Í”{J¤zs¹œª×T÷§þ1©.-"˜k8\P€ðCÄ@Š×.ƒdÝ8Ú ˜8XÂCB,Ôp¾> Ъ%.‹€Š\ ™%òÔ7˜HÌ(^‚£ét*Q}‡+OëÁp9R=õiÖí?lkÚˆä( Æ„¡ àÒ2{&ÜßÖš¾ìï?òó‹ñÒ°î>3 LH/ƒÄÚ€k4€iG F’ÿ¼ 4·§EdSrS ”:ôغ„ÄoËÿ²í 4ýÛ!"·üi94ö“ t÷— MoùßAŽõ „‚¶÷;Hâ»L8úúê·Jôû_|ߟ~9 $Øã)(À§ ©ÿìœÕ dáE÷€€x6Ð{ ‰)wmÛ¸µží¤ ¢ŸühE@êç۪ߜ6Í•= ÖÍn³¡~´a<Ã4%¹þrhÏAØ€¾ ­ÑTŸÒiD¿»ªx ƒEmŸýØ"7[å>:õšù¾QÐÎf Hú0 g<àŽèÍ¥$¥' ”hÀ@k öÄ Á¾H6Û…9È •u°Æ@sxU3Ð;PÄÅž¥#ͰZ›´;æ}4ôw«4|\MVƒÎVÝ<=üyßÖ6Ázn1|ˆ-’Å–ˆæ&qR|Wx´À€}B.”°ØŽ˜#dÀUz¬—ÅbKI@B¾åžÝˆåkO©r¬ÿV ^+a‰Uÿo%'|‹ Kd à¿!«å³>ä=pšk™?æ~|§y%µÎ7«ˆì(×ÍîʃB –ëgcósK–ˆÌ„oê`$n½øNÃb3RÛ9‰L€>•í}´šî°ì?‘wùòº½Eþô86çðÈÈØ¢‘Ö“FªàI`J½šàyØWêK(„Xlº‘çÈ?­YÇf ˆÎ§ÀÔzMP=6ÊŸ¤[”ìô™ÖÁ]ÚPP›`6²%ö$) ™§ (1Ð;¨uÒ  eKänáèÌww‚ž,GÔÚÖS÷<‡j>@IÈÑK–Nÿ[E*ðõ8]>ß?Êó½¶Y>#ÿIömßiÝì®Û¾‚ çºp“Ã6oÂB ûp°»þúB©2*wÒÃÇIEND®B`‚sleef-3.3.1/doc/html/benchmark.xhtml000066400000000000000000000121401333715643700173470ustar00rootroot00000000000000 SLEEF Documentation - Benchmark Results

SLEEF Documentation - Benchmark Results

Table of contents

Benchmark results

These graphs show comparison of the execution time between SLEEF-3.2 compiled with GCC-7.2 and Intel SVML included in Intel C Compiler 18.0.1.

The execution time of each function is measured by executing each function 10^8 times and taking the average time. Each time a function is executed, a uniformly distributed random number is set to each element of the argument vector(each element is set a different value.) The ranges of the random number for each function are shown below. Argument vectors are generated before the measurement, and the time to generate random argument vectors is not included in the execution time.


  • Trigonometric functions : [0, 6.28] and [0, 10^6] for double-precision functions. [0, 6.28] and [0, 30000] for single-precision functions.
  • Log : [0, 10^300] and [0, 10^38] for double-precision functions and single-precision functions, respectively.
  • Exp : [-700, 700] and [-100, 100] for double-precision functions and single-precision functions, respectively.
  • Pow : [-30, 30] for both the first and the second arguments.
  • Asin : [-1, 1]
  • Atan : [-10, 10]
  • Atan2 : [-10, 10] for both the first and the second arguments.

The accuracy of SVML functions can be chosen by compiler options, not the function names. "-fimf-max-error=1.0" option is specified to icc to obtain the 1-ulp-accuracy results, and "-fimf-max-error=5.0" option is used for the 5-ulp-accuracy results.

Those results are measured on a PC with Intel Core i7-6700 CPU @ 3.40GHz with Turbo Boost turned off. The CPU should be always running at 3.4GHz during the measurement.

Click graphs to magnify.

 

Performance graph for DP trigonometric functions
Fig. 6.1: Execution time of double precision trigonometric functions

Performance graph for SP trigonometric functions
Fig. 6.2: Execution time of single precision trigonometric functions

Performance graph for other DP functions
Fig. 6.3: Execution time of double precision log, exp, pow and inverse trigonometric functions

Performance graph for other SP functions
Fig. 6.4: Execution time of single precision log, exp, pow and inverse trigonometric functions

sleef-3.3.1/doc/html/compile.xhtml000066400000000000000000000272351333715643700170600ustar00rootroot00000000000000 SLEEF Documentation

SLEEF Documentation - Compiling and installing the library

Table of contents

About CMake

CMake is an open-source and cross-platform building tool for software packages that provides easy managing of multiple build systems at a time. It works by allowing the developer to specify build parameters and rules in a simple text file that cmake then processes to generate project files for the actual native build tools (e.g. UNIX Makefiles, Microsoft Visual Studio, Apple XCode, etc). That means you can easily maintain multiple separate builds for one project and manage cross-platform hardware and software complexity.

If you are not already familiar with cmake, please refer to the official documentation or the basic introductions in the wiki (recommended).

Before using CMake you will need to install/build the binaries on your system. Most systems have cmake already installed or provided by the standard package manager. If that is not the case for you, please download and install now. For building SLEEF, version 3.4.3 is the minimum required.

Quick start

1. Make sure cmake is available on the command-line. The command below should display a version number greater than or equal to 3.4.3.

$ cmake --version

2. Download the tar from the software repository or checkout out the source code from the GitHub repository.

$ git clone https://github.com/shibatch/sleef

3. Make a separate directory to create an out-of-source build. SLEEF does not allow for in-tree builds.

$ cd sleef
$ mkdir build && cd build

4. Run cmake to configure your project and generate the system to build it:

$ cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
	-DCMAKE_INSTALL_PREFIX=../my-sleef-install \
	..

This flag configures an optimised libsleef shared library build with basic debug info. By default, cmake will autodetect your system platform and configure the build using the default parameters. You can control and modify these parameters by setting variables when running cmake. See the list of options and variables for customizing your build.

NOTE: On Windows, you need to use a specific generator like this: `cmake -G"Visual Studio 15 2017 Win64" ..` specifying the Visual Studio version and targeting specifically `Win64` (to support compilation of AVX/AVX2) Check `cmake -G` to get a full list of supported Visual Studio project generators. This generator will create a proper solution `SLEEF.sln` under the build directory. You can still use `cmake --build .` to build without opening Visual Studio.

5. Now that you have the build files created by cmake, proceed from the top of the build directory:

$ make

6. Install the library under ../my-sleef/install by running:

$ make install

7. You can execute the tests by running:

$ make test

Build customization

Variables dictate how the build is generated; options are defined and undefined, respectively, on the cmake command line like this:

  • cmake -DVARIABLE=<value> <cmake-build-dir>
  • cmake -UVARIABLE <cmake-build-dir>

Build configurations allow a project to be built in different ways for debug, optimized, or any other special set of flags.

CMake Variables

  • `CMAKE_BUILD_TYPE`: By default, CMake supports the following configuration:
    • `Release`: Basic optimizations are turned on. This is the default setting.
    • `Debug`: Basic debug flags are turned on. Optimization is disabled.
    • `MinSizeRel`: Builds the smallest (but not fastest) object code
    • `RelWithDebInfo`: Builds optimized code with debug information as well
  • `CMAKE_INSTALL_PREFIX` : The prefix it uses when running `make install`. Defaults to /usr/local on GNU/Linux and MacOS. Defaults to C:/Program Files on Windows.
  • `CMAKE_C_FLAGS_RELEASE` : The optimization options used by the compiler.
  • `BUILD_TESTS` : Avoid building testing tools if set to FALSE
  • `BUILD_GNUABI_LIBS` : Avoid building libraries with GNU ABI if set to FALSE
  • `BUILD_DFT` : Avoid building DFT libraries if set to FALSE
  • `BUILD_SHARED_LIBS` : Static libs are built if set to FALSE
  • `SLEEFDFT_MAXBUTWIDTH` : This variable specifies the maximum length of combined butterfly block used in the DFT. Setting this value to 7 makes DFT faster but compilation takes more time and the library size will be larger.

Compiling and installing library on Linux

You need to install libmpfr and OpenMP(libmpfr is only required to build the tester, and it is not linked to the library). In order to build the library, please change the directory to sleef-3.X and run the following commands.

$ mkdir build
$ cd build
$ cmake -DCMAKE_INSTALL_PREFIX=/usr ..
$ make
$ make test
$ sudo make install

In order to uninstall the libraries and headers, run the following command.

$ sudo xargs rm -v < install_manifest.txt

Compiling library with Microsoft Visual C++

You need Visual Studio 2017. Open developer command prompt for VS2017, change directory to sleef-3.X, and then run the following commands.

D:\sleef-3.X> mkdir build
D:\sleef-3.X> cd build
D:\sleef-3.X\build> cmake -G"Visual Studio 15 2017 Win64" ..
D:\sleef-3.X\build> cmake --build . --config Release -- /maxcpucount:1
Note that parallel build is not supported on MSVC.

Compiling and running "Hello SLEEF"

Now, let's try compiling the source code shown in Fig. 2.1.

#include <stdio.h>
#include <x86intrin.h>
#include <sleef.h>

int main(int argc, char **argv) {
  double a[] = {2, 10};
  double b[] = {3, 20};

  __m128d va, vb, vc;
  
  va = _mm_loadu_pd(a);
  vb = _mm_loadu_pd(b);

  vc = Sleef_powd2_u10(va, vb);

  double c[2];

  _mm_storeu_pd(c, vc);

  printf("pow(%g, %g) = %g\n", a[0], b[0], c[0]);
  printf("pow(%g, %g) = %g\n", a[1], b[1], c[1]);
}

Fig. 2.1: Source code for testing

Fig.2.2 shows typical commands for compiling and executing the hello code on Linux computers.

$ gcc hellox86.c -o hellox86 -lsleef
$ ./hellox86
pow(2, 3) = 8
pow(10, 20) = 1e+20
$ █

Fig. 2.2: Commands for compiling and executing hellox86.c

You may need to set LD_LIBRARY_PATH environment variable appropriately. If you are trying to execute the program on Mac OSX or Windows, try copying the DLLs to the current directory.

Importing SLEEF into your project

Below is an example CMakeLists.txt for compiling the above hellox86.c. CMake will automatically download SLEEF from GitHub repository, and thus there is no need to include SLEEF in your software package. If you prefer importing SLEEF as a submodule in git, you can use SOURCE_DIR option instead of GIT_REPOSITORY option for ExternalProject_Add.

cmake_minimum_required(VERSION 3.4.3)
include(ExternalProject)
find_package(Git REQUIRED)

ExternalProject_Add(libsleef
  GIT_REPOSITORY https://github.com/shibatch/sleef
  CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/contrib
)

include_directories(${CMAKE_BINARY_DIR}/contrib/include)
link_directories(${CMAKE_BINARY_DIR}/contrib/lib)

add_executable(hellox86 hellox86.c)
add_dependencies(hellox86 libsleef)
target_link_libraries(hellox86 sleef)

Fig. 2.3: Example CMakeLists.txt

sleef-3.3.1/doc/html/convention.png000066400000000000000000000642041333715643700172370ustar00rootroot00000000000000‰PNG  IHDRDðõÂÛH0PLTE±QtÔŸÏÖØÆíØxxxŸŸŸþþþOOO’ÿzzÿ»µµÿLLqÁr°Ü>J`1hIDATx^ìœ1oÛFÇHÓzÈrŒÌ”hM,D ‘Ü–zóZ CÖ˜wöµ@`eëèà~œlý àí4¸€6²û/RÔU'>-KÿÉ2!êîô~÷Þ½÷(¦·[²Ÿj{º¾ÆÇ!ç¡=ÃÞEý¥·]ýáL¯©Ÿu•¶щ1F;(äü®+5⹌bØ»¨+v£·[‰1wz=ýÁnž!¢×Žˆr [Çä˵ñ]Eô+{)ôVˬ¨yÈç&¦ ™¢214ˆ"§ûƒgDSTä€(uG4æM!ªp·Ö•œQ:­]ÐT˜ ‰%Ÿy s¡[DA(¢²*n`Í/@ª´Ö}3×7¢–ås@T¥8Æ8¯‘ Òzºè=¢*£¥jG ÆW£fœhGe©ƒŠÝËCßB’§r…Ö2?“ D\ª‹(ºü"*‘&Ó2)»CiGTšM @æ¨G†h\ËV/àŒê#ª8 ¢ ½<ÚÇUTÔ‘.&ÐÅQZ›+ä7Ó•=\r‡;¢CŽè[Æ>ztg§­;zlQi_9ÕEaQÀJ"*j”ƒ¢1¢ŠW!‚ ¢ t»…ÙˆÒQôÆC>nŃ’uÙƒäé}$j×EQ þÉ/¢§<(mR²øºº&u/6h]ˆKAÓH“"ª D€hH†¨ «U‹ÿ@¤ Ýâ#¢B7,µè5d9Îàu…¾zBÓº€Ã¨ïŒnáEi´¶z’ÊO ‘Cw‘åÛ¦Cô¦Ú2¢1'@>rN©¥/KJ¨8–6Œ/>Zè“»RàV°.[Iþ—Ü…uá0zàQ]ð’¥8BÙ¼èýH›!ªBôÉxÑwˆvLl‰JD,Õ³äÊ?ã(Ú &ekŠ4ôÁæE$Z}‹Ê(õ§¤´)Ù…uAÖûú¹#ª8% ôÃ*òÀR&WXDôÒÿS.“¢×D¯áCIEeôÜ3¢–Ð^ZÝ ²kNˆâË~Έ,²‡ëÄòÄŠ^-lÑ÷øø¯*gG–@ "D;´ˆèõzˆ"¹VÑ7ÏÑ0 ê=ÊÒ]ž˜Â½q÷(²EÚ·ÖCõ zD5òE-!j,ˆ–.±1¢üù#*3'JÓŠ‘¡‰U——$y¾È¾´ô Ž<Ù ‡Ñ3Æž¢–=jÅìE¯A¤ç8gïÿUž•Œ½zˆÞYÂ<Ñ¢_‹ˆí§óQâÇT?™Wq…¢«g™Ø£™=&RQ<õT55Ê~-w#Dqƒàh¬Wi|Ú ùÊ_ä=¬+0ìrÿÕ“;]L¬¸ ¬^:5î„ùèèE¿Žä0Íû —sh2«qBôÛjrk¥#‡ÁÑO‹ýEòïƒß6DT¦ÿ1wƼq#W‹“®µ€ê•l›‹ °€U$ â’.mŠàšÓ’¹ME›.íuùWˆÊ .€å®s¡ÒA\8¸Ë:ÀÝ*Zzµßò=>ÎLÈ%=—šáìr<ß›÷Þ¼a÷J\ yëFÔ·z)þ öˆ×Gô§æžå´ÏnS½ÇÙmLeÏêätFÅšï>¿L}WšË>*d0׬ D#ËÇ£EƒÕ[e=ô‰™Ä—ãDôózÛ¹ˆŸ½)†¼O݈ºsàžWÎôlÆñPD—¿š0™mÄáˆ.#Ž…ì'H?Û\äFtR9|û4®aÑ…•€Áh¢t@%³ð ÕeŒfÔ=Ô$æznÒ ¢Q¢a%pzá¤áŒÀÄzñ\‘ÀÜAwé¸p^N@öÚm.²¥fê³õDƒLº$3Úø Q6ðÈfíp@7¢|Aâ³h:´üfëÇæ†Èt•ŸPDaÌF”ZÞ…weW}W?ŸöM z2'=ÑQá‡Ó@÷  XÓ¢«×dáE턚*d2oD±•”ó £þˆê5øsD?¬(#pÀgÛŸ‹ Ö¦ªõŠ¢˜ÑE‰9[Íô) è BTÓß³…C-&­ï"R}DG ?­â.€(g÷ U|\®ç"…IŠ2n ÑëjªBbð1° Äv úe Etøå±°šºhÒ%ºycö_5ÓÛ¶GÖ{&wý÷dè¼I _4[½éhPŽè¶•—äpUr(Õ9ëF41ÿpÿÛ²ü—Ê~é‘'è¹öJ·èÍ„ Lp<8¢¿Ó’Ý’Žþ rƒ{° ÑÇ—wåéxÿr^îÿø³|';ÝÔ@”G–¹†j´…kSßp„FÒ¥»hËž­X.z^…˜‚W!ÃÓÙˆB4ÏÕFPÚbÏ]3¢£2D¶ÂÂbR[hÑtSþ_ 3<:Þ@”üD\Réðõe¦¥¡#º|R…èÏ//ÿ=~ü||5'ô™È{ˆ_´¢ü¦G`;ºM)êy-R°Ù§:8f+ ‚ {3,Qf¸Q¾ËTÈš#*©nQŸÄj LŠKlº=w[D.4ƒèÅ«:0HM&qí]^¬;°M__~›Œ³ËøbŽè󲨅ô·?þ͸¢\¹ã.†Tð‡éí^R6TÿYQÅ:2ÐC'>„Qs¶2)¸”fG.ׇ(fua·ÆÝKQ~«d¡J罫&êÈ¿ÖG”+е´4ÿÝíÍ£Ri“ÜÇ¡îUGšˆÎ…ç?ïþùìQ®çþ_¬Âk!ŠÈRèí‘Á}…pø†p1h^  2 ±Æl5"ܘrº!J;7¢ˆtÓ¯K[ï¶¥(^c[© š¯AI+ì¹Íí… ¬@–MÄ.¸O¦{¥>—Û27ëÐ;‡Þé—ó–û_刾ÂPÖ4¢¸H¸|cÜtmx8¿×šþ …`µàF×ÿÀ˜­føŒíá[¢p§–eöºu°ND¹vÜFh.F`=hØtOJU,J”ïB —ÆœAEQ‚\¥±_Œbpñ@ôb±þÜÿ&GôÅ ¢5…¨­G\{m^™i ‘·ÊÏÊ7{!šõ5£úÍ>(Ÿ”‡6"Í#ª%gF•j‹žÛ=¢äO)õl.5ÖÖs©Á;XQ{)ûl^™XIç"bjÂ`#zz/9ÿx™—p£ˆ¢—ÚÎ%wª …B+Útô!¸{Dñgò&£!%OŒ.x¦µ šêùõÖ!¢jŠk!‚!Éѳ;*¿ƒè3~^ûˆ‚]U”ÖÔ@”Jž×(sGǾ=œ[†WáKWQ(¢tâèÀA='#ª2Lâ‚VÛ¦ž›â”ѤUD‡š¥s-'4¢T2݈æá _¯ ú|­RT¯Æ5¢³òÝ;Yn·æ!yy-öÇ'Å€-Ú9%ò™fHÆ“%â”r†(ñIþˆf†‘¶"„þ=C”R¤¢éz.±¢Çë#ª šщ‘-+Ù[n¥þ—gUȈy%êÆhöΈ ¢Þ(¢ü,jM‡®ë˜_²\¿ñ¼=EŽîú#ºm"ºé(ßy#Ê ÂþeÄDm¿wˆj ½¯-ÖD¡¬ QÞìÔš11Žc~Aô¶p%4J¢§q÷ñ¹¬ ÑOô¥ÏP”¬ ’bÐ>5‰¢LgŠOPeK“(!ˆÒIä/E¹îW"Êt~ÏEà3åP[D$楨¾ !zwȬè(„_Œ%D¯Þ-EAôŵ/)™=D¿±ŒâÄeM¤(Gp¶(_HQ7¢ÄwèñNž|-Ï¢Ú·¬Õ ªÍ#ê¾D®€ ŒÈò+Nèpæ'E³×,?¯ð¹t‡(BÔ+µ70|1ÖsoD1òF´g#š"š‘Ë!Qj¨±t¦Ëá1=Dµ¼H"Šõ9@×8 ==[Y~^àséQ÷ÐÂê_Ó¹¨ƒÑò#€7M)zÒ€Åë#ºi#J,k·€©~Üý) ‚ôJ´+DÝ2ƒ¯Õîzj¬¶˜ˆž¾ûŒÏ¥CD‡ŽÔˆ„6êX;„#Êèù"ÚÑšR9½Û(¢ü”.¤(Ÿ¢p©m^ZCW»_»D4ud—%:œ¤ž„èºmŠÏ¥CDg®@× à‡#ÊÔè@ŠÂèvƒˆf#ŠŽRrS¿ÑÄ [èQÂ\k1Ý÷Ljf+ºí>—î=÷€´ Ña-D3æí"J pÜ¢ìýê.ë‚^¬—´;°Vß‹Ÿ·SD}ŽáV š"z¶"8Oñ¹t„¨[ˆ")†G”Õ›F”Æl5ë‚mÑ“neD%¢€iÞ‹!JtQ ˆº…(’Rw@0D¢šSù5ËR|.Dµ‡¨{”Ü ¥&ÑL„;O85„(Œöj[t©++-f$-Ûñx™}M3ÓšÛÑtU}‡ˆºe’’4¢Ý}öpãsé QrŸûœjDé1QJßB´g#š4(Œ& øE¡#Ñ^sˆ’– ‘*s­Sœø!zºF4nQÒm9 ¢èÑÑ ºÚç’¶Žhê#D‘”ÄéÃCØÑØ ¢0:è Q· ÎF²Íß¹ˆ§j´ÀLx†ïÚCtè¢HJCD¡ —Ò ûLœÑ¢8”‚úLM)š†"ʫݣKÉšB”EXä‡(¿·S)j'¥`÷¹º‘¡ãn¡Ï²ìÔiÕ4¢/Z@Ÿ|HŸCKŠºctÁkQÑçrÖ&¢œ•Œh,ë¼7£ÙXiD#ªA#ˆ¢v‰h¿9DÉÛb#:]Žr!ŒH˜ÛuùIˆrÜt0¢×ªÎk3X~»j-úZÖ¢¤›ðî•bíˆÚ°Ñª>¢H?DIÀÐ)¢vö5>é1uiE$G1cƒž}~[@”Œ=!}²* G4ÇÝöJù\þÐ"¢Øö ¢lÇ5Ó$¢±‰èVcˆ²[Û(íãÆ¶pDÝN!ndù€Q\¢ØWÂU¢Ü|]û’Û€>±mŠMâXK\ˆî¯è¶—yy*ÎnQ÷è§Æ,F ÛE'³çtƒˆfˆºÓ‹© /)„(F3D¹ŠøGø¦ó¶'Á’¾DÙWåoÑų ëRŸôbWè¶§äÙè[C4õ€O DQýIuÚ¢=‡fZQ¾Õ Q>F #š#Š® ±UˆÒ a*S’_̶s¦K ˆ2¹\%3e–“-Úèg ºOn1q¦K[ˆ"ý£‹Pl²¬lk[tyƒÛši‡ˆbC©(Ç_„#J38·EcÑ ‰¦‹ÌµÏ“oQÈó.B±¥N­lmD/ˆ\¸È??“†²6ù ®&zÐOÉKª¾_Ô>/g]gˆÂT}DY†"Н†ÑW™‹¨ Ú˦Kcó<ù6xɼõô f!zžQˆ8û†ó\„G¸5D}õÜT É%^ÖZÑE=ÇAÁ$Oïb-Š+µD‘aÁˆ²„Eç¨tº`/"͵P äO¡…SºÛ@”Cn}ÃèÙ *.ñ²z#ºÿŸ‹}l Ñ4äs"•½Îk£Ëéäã0·0vbÑÅ[¸Û¢˜ãPD1ys(UuèºëˆáC+Ñ/¿Œfªürc£ D‡jÞu×DÙDÉkä‹èë=>ónúá¸ED'žï(TxÞJâ zéÒ(„ ’BÍxLI ÑW1E4K„ ñC  ¢¼qBE?¶ŽvÜÕ7rÖ"ÕÓ‰Ðch&ʯÑ÷Ö‰èÌOÍà ‚b'® —.ˆž>;½÷¹<^Ñ𴆨}„¨#8Q“ìw§ŠK Ñ,2w;.f™K 6ë‹S³wÇ´ Et´ùˆ›ý£‹/BÚÔF”§É|۬w’Œ=°hõ% :“¥ûfFxÑúå³· ykž… Èe9¢¯ï•Û«WË­h+Úɰîn¬Ñ¡èA,¬ÑbªÇÉ‘†X#Ê$€ÕÉÊtÉ/石»vÛ2}ßñ¼erܧD7wæwgÇýÐ]öœ=ÌÙñQ¿¢<èNBo^ˆ®4<’IÚ£˜eOæ­äÊŸ—¢ÂMD­Ø…õ#šÚˆ¦72G¥Aì¤,öML&÷R‹î7ãñ—ñþÂ-zúª4ráó6U…'¹)¤)•ùÂï«Dšð™Ù#xÌá*Úc"æ…)B¡ôR_DiG Üé¢óÔ@â)nDá2YfØÝ„¹–ÆÌgY„ctýˆŠBÕ$?I¨@\T§œ´€¡X¡ —/Ò‹ç‹wÔÆ+‚óÓ6Ùˆæ²0™?âqôÔu<?9@±µµT¯[Ȱ¬!zï^«…ìˆ2û‚ö‹BA£ˆŽ‚eÎÂþNHìîDGü * ãÙSŠÂ·6D5O"?Ï»º÷fÆé}{ãqö®26e2óòt±[4~ü|UÁßD3ã9ôÈéˆ]7¢¢DV^Ý‘lg22¨h@ÖðhѬЍ~ˆ^…T`‹ÖúÀ(–ÍýDV˜ÇúɈ]ݣ΋²ˆXVaÉn Ñÿêb<‡>{Ô® FtW¼ã{†°ŒÄ¬®háî( Ft‹å\»f‰tD]ôž‘D±Êˆo»î{²>ôHa½gä#Q\eÀ¤»æüi|©-]ˆ1Å]ôž‘Dq—ŸmºëÌ_ý@Ê­¶t!Ʀ“|Ú{£DZ,ß÷ÙÛàc¨ÎRÔ%úô8ûRYQŽ‚|0LÕ¸ê€A¶–<èÀ†ÈH;! ]5Îýv†ÛÐô<Ÿ§ †êle qžæ¡àœîïrYÒ’"$Ûé½äöú=L»§ÜÊ(È_üµö^{m5¤"Š ‚ Š ‚(*‚(*¢¨ ¢¨ ¢¨ ˆ¢‚ ˆ¢‚ Š ‚ Š ‚ Š ‚(*‚(*ü’þø{:%DQá:˜~L§õ‡tBˆ¢ÂK‡õè®\j4s‡NQTt0^|ˆ-²ÎtLöÀ)Ũ(*L1 Cé£ÚzŒFºGöÐ'£¢¨àÝÒ¡\!¦ýllÇèÜî”/}R1*Š ž:8õF8$Í&p­ÏFZUô”bT<,è0¦À!1ÂjŒ¾€YcÊÆŸ¢¨.À@ïE-ƨ¯mÏ„Ÿ'£¢¨Ðpû¶ÄûµX:Î5p ôl_p³¦Ó@|PƒÃo†éò½…½2T'ü<lµêÒé ŠJˆÞãöÀN¹ÞþYSêÐuic7ô4° QL§‚(*!ª¨¯%ZäïÑ–õ5pŸ8e{µh@S:DQ Ñù¸=Hå½€»7D1$s |OD¾ÝÕ¢ùÉ-‰¢¢tHŒ†Ù5Gºµ7D1&cž*NV`­¯]ÐI!ŠJˆÇèžJtÛb´'DG6ý6ß ²¿Z´ “B•åÝS‰¦ÿ£ÅÎmmÌöEÿ ùð¯é?YY_-ré„E%DÙ»ý•h‚çì Qwd&÷ýô=€ïi£2§ÈS@Ñ©!ŠJˆm0Ø_‰îo¦Ð"Ï̩ؖ¡QúÇnoQ›N QTB”h†Û½•hþÕή5U”tÖK±QƒÄØÙ#<ÉÕ"QTB”j°¯M‰àì QÒP¦Ó?ha}µ¨’=ô¢¨„(‘‡×û*Ñ `P¢æŠ’%$Õ¨—k ]è2Žš.«(*!ŠÞ!gA¹å3Ñõ!Jyñ¸ÇÆhž×¯-¯QñÅ!r¨Ùˆ¢¢š#e…x_%ÊŠvjC´¤h‹ŽCÇyŒF€c··¨K9ºá1*ŠJ%êstzí=•(㡵#D§¼fº:6F/ò]Ú]-zQZ-Ú4=FEQ©Dûyvb½·eEÛ;BÔGî}x¬]os‰`yµ¨ô¸£¢¨,çncÔåé²õ•(£Ñ­QŠøÛ3¸fBÄdˆOiŠ‘(*˹}¬iÞ&šTçɘ+QFéQ ø]rnþ’ Û«E°;·Ì>¢¨0u {¢>Ô5†D>Uáßq%ÊN}ˆÒŒç¢L`£–W‹À)ÚÀE… W¬ñF€³k¸õ{ÑZm8DË£‹4вttÌ~oÑÀùÑ÷bj¢¨0zõ‹B`¸o¤XÙ³ðI›«?æ—ÚÂ袜íÚXÞ±¿Z4]mG:øÔ8DQ!Pôh_ͪÜÇÉÖ£~—ȃûXQ1f1êý•V‹" |A EF¢4KB´ïîˆÑG–yO‡p^Ý­Ðcƒ‡é“[D46o²¿Zh¨15QTb4ïÈ D§NmŒ>Ѩb«Ò0~¬¨§\‹Í@æpýŒß ‰(*N]o¶>éA}Œ>iÐû´I±ÓsˆZ[-ò¿¹~iaµ¨•.Q“E… 7jš¦”Þ£Ã'Ç9ŸŠÌå鈭„è%‘—ö¯óD½éBQ³E…‘“¹µÎÄ«ÑÞã+QÚy×eEáZ Ѱû!ìÚÆ«Ecß^RÃE…Oª´‹úGoóÝ ¸õ›-âãhfÌø`ËvG0¦@•Jtÿm-ôL˯¹}Õ¦*>`ÊÈx'vE–˜jà%ùÚ¡f#Š ©«ß#C(¢iuBδŠ6f·!.,ÎH¹Mïi4¢¨ yXÈÁCk—_Ѩ2!CQ~ž3V”>µ¢ÙK{ÃcTfœt—zU©¨ÕeEíàwj¼9¸Ô`DQ'nùúã\+—ZB›!ʧpl£¡È»M»#Œ(*xÀ€_Q?®ˆ]ÐS&C4W½KE~´4­s¦\Þ6QTÐhg&¢Ï€»mzŽ*¿>y¢÷o@9ß0~dXS£!zÔ`DQA£ó1•h¤ŠL«î? ÑQLfl€a1õÍå^ÿ$F'hSãEEQ®Dûã}ñt´N‹ö9F™N1Di¸Öï-î/ÌRtA)¿HSº©!*Š Aïà-ßiÿ~@ÜXâ—Å¥X[¿·xâ_`"48DEQa4(åÓç××_ïR”˜]¿8—æÀÚvˆRˆ…ñÝ¥L“CTüR>} TÏ묮+ûœªLDÔ·8®³jO´o{Q±Gƒ¿¡ˆ¢‚ïr>m€]…™Ÿêöð@[ùáeá`LµÌ·ÂýZ«ïÔ^‹ÝæC4F-[+°s$Œùa›5QTbÔMÏMc°³¿Áר3ÏÏ R^±¹¢k+°—Ôºð°hM Eò›;¯rã"´vÖ„1ûWó‡hù^ðùýñŠ.J!úKÇãÿôp:{¢¢¨¨ô=w˜¥³³«=ÿЭ3Ày$q?U•¦ºmtµ8‡h_ói퉊¢Â(.£$jïýÁí:Ósˆ–¥]:¡]ê1šj¬í4,5QT:ų£>êS”‡‰lv}ËÏC”§ òöæàÈ­’˜ï-æÔ7€ÖpDQa(ÐÊífªÅUýs1‡h-ƒbˆÒŠS@|lSÐx0åÜÓ¦ëÄå+Þ¦ÔhDQaRl ¸ i’ƒzX¥JC´§¢S¶ÉÚÇïß.už{swÿòTnúF/ïÔ`DQa©¸ÁBuw£¡ªU¹ß¢ÔW`'@Ç\,ÿ‡®9÷’Œ6‚‡HL½tN}sE… »ãOD@…/hÀÙ»ƒ1»0¶"րŒîçŸA±¬…Z¢M§€(*вˆŠªùéfOˆò¼2~¡ä¤ãáÝJ]³^Ý‹ü_Õ!Š0¤@•]Ž©EmšU†èûa9F¢¯­„hÀ€G%\›,êò €4ù¢4QTày!•S¥#z„~äÆì•ë?xpËp9턨Úž ß`Q—w‚&Ù+Á15QTàa\K^äŸ*(*2ÃÓq >â\¬[+!J3 Uº- è=ø ŸÂk&¢¨ ¢À ‹›Çñò$½<ˆCtËk2#äæßT|Ünä ‹çÝš(*pˆ®¾§=p½9«ŠQ\‰Fïan.ºöÃ÷çgq_ñÇÓ@•áþÐ%ò^W=ª\‰^š ‹?œ_l”Ñã˯è”E%D‰<¼=0D‰‚«ªuÊ˹@†èÂâq”ˆ¯‡&/ºNéÎòŸš¤¢¨Pî‘_­C”(¬ªFM»2^Ö-ß‹vÓ&¢~—Ž&(žjmmNèØ¨(*!Jz†(QT¹¨Ë!ÊWš¡TffPCúhf5çEf€S ÈøÐ¥U1:,†(EÀ>–|Ž¡ð•z‰WÀš‹(*\_$½ Qš£*F×…åócFh*33Ñ~£òË{|â­±ˆ¢Â ”Ù¡!šlS¢–¹ÉS—¸Û>ubòÌnv‰Ä‚¾4QTu>&D=n˜Ô]Í»bw#Šæ±¼Ižo”{º•þqx>psE…+ô QH—óWUùÇãÅx¹Èœ ŽKÐ2zÐ}`=©±ˆ¢Âøˆ€ë@Š6TA!å&Ö]Žë÷ Ÿ¹‚€{[†5QT˜j8×î!J›ÂwjMécÁ:`MVHã;ÐÊ´«Ð×ùX—®½¨+Š s '®ÑÚù Qí_ì°x˜»³–|m^=þ°¼‹ó›jä7:FEQas%ºz,"\ªd–+zi¬? Ë×›ñM5@—Ž(*pˆFß=ì¨E»üuÎçË9ÚZ):O3›gÃ˹ºéŠŠ¢‡èôf»nZMÀ‹µ£ú€9yˆÂµÖ§8ük…(­pA Føn@ßThÕ¬2•I§YŽëÚüì¡Ï¯¿26XðÉ9»!JÁ2¦#Š £ß~ÈÌpÞ׎6˜~»üÍöC–fQ›ž€‹Tç$sßÀÚØ§ö_'D›(*@šw3¤“Æ@{˜çÖéq…OkòGZéÞã†k\ŒÞ*ÑK ˜L1úÅ)5‰¢Â( QUúâÐ #>Ú5[€r³± ¾n“€·ße!ºAÚ}p,Ë88©E%F»YœÞòÙì]ô¡òêôiŒF‰£*N¬O\ž!&3>»Òà ¨ë¬MÆ–PˆŠ¢Â‹ò»!Äìèò[UÏÔõß-—oÜÂEdSÝ"S¦iˆN´D#EG£O)DEQ!Øþuå ~3 i'@'}£ý ý@õêÒ±tƒJ¨loÔ%"C³g ¢¨0ÊOÆÜÄW_ùh¨1…ã&´¶óÀ6ˆÉMZ‰òTÑ–IŒ¶èdE… ·õ©K‡):Æô«mšxY¯>Sè$~ -¬j JÑ&0:ˆ¢Â»ò”èí;•6¸ÌŒÞÔ¯­Š(‰Ñ‘¹}€ˆ —›]Á5[vZˆ¢Âˆûõö Ù@}‹ÌÌ ê¾=OŠÛ$F5:æ3œ/)âã3+“Žˆ°E'„(*”糯ª¥‹–|]>÷lU¶ Jb4‚¹¢ôl¹¼¥è âãÑtZˆ¢‚f/k†kn²oPëÒ^H%ýtÓ4túÖ:÷øI¡Ù3g.¢¨Àï¢AMæy@ö«\ŽJ¨¯Î¶·!=`]õ|E!'Õ¦µ“Ã~ t´;DYiž¢= h€7¬iNç‚(*xˆYƒ˜fŽ[qí‚è¸/Õ›¸ÞÖ(êr81™Ã™=ˆúÖ‚Ô¿Áò5QT!æÞD´RãŠQ™×Ù!Qµ¬¹ Æã^ˆí%GkŠòäß[¢P‘"nÍo&¢¨p…qnV{+ä÷U§sѸõ«¯æñÿW¯‰æ,·¡¢ÌÌR=êkàˇwmÜEnÌ «¯aŠ4ðÖ­hmŸ­]«¯4Oƒjl]QekÃ-qk~Et²Åa£¼x9wª«¾Ë{˜¼hdo÷–gš¤ QƒcT6N&R+ ÈÎ!çòr«ê;5¨3h p+~G2trTV÷¨©ˆ¢B€Q„tÓóòàM#w]y–Ç Ú¹n‘éÃFŒF@w–ü6Y‹(*lpý^XdüÜC”&™Œ íôÝlêýõ+K—s¨^;v¼÷nùÐ\óE:tËw‘A÷‡h½¢>õÝ5Ɖ[[zæ½ ¯‰(Ú¾A=šÙé‡Hbtƒ…(Ú\ßÔKÊ*чoåî Ñ×¼"º¦§\Ù—<@ý–kV£{ ïn0àVz;WÆ$1ꉢ &#øDÄ•èhí ÑTÀqÍä>õ›B%:1wjžÌ;ãòÖÌúmc舢'‚¼òvxÔ_mˆ^P €ý·ÆØÙ$™¾ÓpÆ|µŒ9W©ñÞmIQPóxüVPS<Ý/?ûrЭ1ÝÉ{8d ß›:SÞI(*Š ¼'êÃúïÀå¥ÛöaW¯éŠâÖ|J˜oº×šÿ”Ól!Š6+Q^©«D1ÈUÁâ%òc²G^:¾X›‰Žâ‚©EŽÀ7ƒÎ€aýž(\^eu÷†¨uøµÙïÏ?u‰È[ˆ¢ÍE˜ÇÅ¥Ú>׆(éq®t½#ÁšCÔ2|W8†Æ)Êg6Jm.BÐ}à–ŸU|e‘Þ#¸Yd¦lùôË?Ò|õ'Í!j•+ëà˜mãÄYŒ5¼AF@îeXøz”mØÎvhTvðÛÊ6@vÀ*£ô§ 40´1²Œ¦[E•(Ú\„ÀëÝË@!¶ß™bMD«mˆN=@i´*ÛÛÄX_Ðêºkô¾Üz´Â»E›Ì¼f© äW ýöO^jà¸%zuUJ É6|Wøe2îÌ¥£ñ ˆñ%Š6!@—Cô¶.wn³o¤ºÎÒ¨ôº °6™k¢o7d†Wþ÷Ô€7E›‹0jïÝKbžÊ帅è™À­ˆQ² ß>]Y˜ˆôÀ?L騀–/)Ú`„ÀÙ¢åK‚ý¾ú.M´žÞÞØ©ˆQ² ›¯| õ=£,µðûs“E…Q¼·!ñÓiÛ‰™hWÈ´ ‹¸´ÅƒTlg‰ŒÏÝEI鈢ÍE¢5„Ç妓ÞEOð{d0[|g@6ˆn–¿¥”90æß*MDö†(ùËŠ#—¡Jd­ >ƧՃÅ'¯ƒßüÕz–fJm0Â=‡h >ÙcJbtÐ" ŽAá8H‹ß’eøê ,DÑæ"Œ^}à=¼=T4ê,Ñ:^Ñ!ÑtÅ»­¶Uô§ÀE›´áâã‡Äë…è0¼'ºÄøèÖŠº~¡(¸ÁrltÂ[@“cTF¢>ÐÎ XE;™!ýòêÈ ‘฻R_ÐQ|þÃÔÝo.Q´ÑHŒö¨H4Ø3CÏ¥(“Fÿ7TÅûôGl>zºCä<¼¿IìFo›Ï¿EŒ0Â‚Š„*>Dèpßû±Ÿuó®%ß/k^d§XÐ%—­cú ®4:ÔxDQ!ÀšJ¬0> EIc}è^Ël‘š¦##Vêˆè¯b"zT„úÞXm"ÂhH%BàåŽhtò´¥¬ 6;d›:?„³¦D;&ªM–¿} %)ÚH„ õt(ç÷ÛÓžu°PdÒìã5}$a7}#ˆKWwEÑf"|û$þêSOºþëdÈ6DQ Ñû¯ê/M·Rürù#å<¸–B4“jtlƒáœÒ°`Qô$úHPëÿüþá[õµbEçI»J4ÀbûqHG1B»˜ÉŽ(z >€áô¡þ¾Ñ©æƒwKÔ~ºQPºM&pêý'ŸÐqxèñãpµ(zHˆ:[—.vtÆÞÅä\+¯J61M= Mv–s“~:ÅÇmQôDÝsß(=wi?a.Àjë‚îQ`Àý@†!J4›dïðã†ɾèé !J47½†¿ÜŽ'âÔŠ ÆvöD}2@‹Òã6cQôd¥†Mq‘oâ¨Ì ¨¬aˆšÃÒã¦t2ˆ¢¢˜)Ê£°£ü…2ÌÌš g¢æh OvOT•Æ¢ðcýÓ«+DRnÖKg×›©Ô ÇTB45JýÓQQTœΓèð¿·3$ÜV¼5é×¼{´U=!1<µË袗»üõ;á›åòkQ´¹ëôõÙ Àk¾Â©H(ô=q%ª“5ÙÔ°`C•ɲ֔ˆt¾°ëñË@#E…‡¨C1Ï^_ƒŠ}TÿzùÖ¥B%:Clat¾â_oÈ×Êðyü8ç_< EÑÆ"h}ôšiHÝD‡êà÷I}A†èâN›‡—I?;vúbû´•E›‹0j}ôš)y½êÒRéñ‹X2WÓm. ŸÂñÓq¢hc‚î£}þã~a§~c_ˆÚA#¯œ_ó¨Çâ¥KWWYªÛ¢hsFq)DC@Ŷ#ÑîžµC¨ò7lJešÔß½llÚ,éNÜ@m.B0,†ètÿ½f+Voƒ±Í­G»Û0ÅE¦hÇ@x8ùŒÞ-Ïül,¢¨À!:p½{BüÊ8´¢õ¼X‘ÏoØzF=·Ñˆ)‰Ñ>¢hƒZ…îùvŸ„vîB@µ¢SQ[,Dóâ(‡®Ù’®ŠÓ‰¢Þ-Ïül&¢¨0zåå>M“^‡ª)gðk ×Ö‡w….Ѥ8`Ð!>ý­Ks E”ĨE àS''Vv~{˜‹ÂEܧ7Ë—”â«ïXz{ô¹«ÈèàLy¢¨wAm6Âè±OaÝ2Py {Ÿ?>€{–Éjˆ>=‰³1Wô*ýý2S}Q´ùHŒvÊîðkU^8òÇI™ˆûŸð¿C ì+Úå?¹M†l²µ'墆#Œ¶©ùÓW1%LpAuDPœ§]µY[D}5àm‘m¼âu2ÙͱcŠEÑ#ˆ³“iî6azT çPoûVüÀ€h†!Çèšl³JÃ.]8š~mªhœ©¯ÈÇ¢hÑûFgHøšˆvf v²(Ë\ ¼.ïVö1 Ûðö¥†ä㥡¢kJ˜¡Et‰{Q´¹Ao»ÆsýNã%M€Î.S¨ÜŽ@ù˜|¸d¾&bÀ% ñÆDxzoLmQ´¹ÿH`Mä¯ð& ©ZB¸Yˆ¶Ê-øÀ,È2¼ˆµÚvOD&wÅE'p•¡(*DYï·}W㢓z’ÆÐevDl>¾íÛϤiêTfS`4Ç/ºåÿ Wm0Â,K‘é¾á[W*N¼D/¯ »O§|öÇT$º¹{I†xí|—d`<ÇÏ/;©5QTàIí!€íÏ:´úá¦<æcîjHñcÚ­ ]ãÑM¾^z"»c"Ö¦ÚP_Ýç”vˆ‚ì—à “å£éQœ½ |ó[Wm.B 9D“bó%Õ‡eLY%Ьcð³šänmx11ƒZæ3@5ÐÖP±ÑdÁ뛤|±å¥(Ú\N©½Ú5¾2ŠË·ymÚA?}¦î‘_¤³Á€%à_Ô½EÏWÀPm,‡(_o¶‡«´ruv4ºvžiÜ}M©Uotƒdž/q*Ñ©†#Š6Á×pbуN|òÝ ñnI>…Ùg]2äEÖ¾`¾›É˹3 E‹i@å µaú©®D÷( ÒÔ^Ïq­ºå­"3x9Xˆ¢ FˆÞ¸ÈwG/¼÷zaò$ ‹óL&P±…¥X[ Q®ÇŒ(*ÌŠ!Šåõ—ƒý•èaÓ’A±¶°¢Džc-DEÑæ#l€V¢Îµ†U¢tÀÈ@ iÅÚ*ãe,…hQ´Ù+ÑU}=:]oM!f÷ÈÀÐ!/óÒ‡‘ ~¢æD‹bˆ†R‹6a•‡h+ùÏZ“üåÀ?hÑ7ÈB ¯ò]ÌÐ Dg¯m†¨¾wçz²¢Ûp„ 0æÝ}»v_á EßÙã{͈¦F!ª—/ ÁºÏEMF¦Ðæ%šƒÚ·ÍƒDã·Gî{¿”±U–Ð¥½EŽððÜ-„(ÍwtïôÁþ’¢4†ìª©Uöbt\æÐxDQC4QtMõ1Ú£2_V;ÐÉŸË•¨™Uk²†V¢=j:¢¨àó.NwÏûbF_žqæMZ\‰š Aö‡h,Š6¡?þ¥f©V€[o³~¬è¤]ÙDߥ”˜+QBP%Ÿ-Ž¿‘Côóë߈¢ÍE”ævÛÔïí¿i1\‰ c*½"ŠÞa|”ð¯K!h‰¢ÍEhš€®¯¿£:œ'жwÝÌ•¨!açiTãè1›·¥ÕpÞa(Š6!à…MÀÀ}mŒ®‰©+]C€UæJÔˆ7­‹&GG9DC`@—Ž(Ú\„z¢ÑÔÞ¦0m=¥*Éü´Óà¼'jŸ³ªßBt“çI™™Vh‹˜ß QZf?Ûg@wP’©á¸d‚pT‹¢ FÈ/2üãAÎpäFÛÿSA°.†èÇ+0pw¼)˯–2¡ð¿×¢hó‘ )¯¾ñŠº°ţ¸s+†¾G§Qßû ðòÉ´o:©šÿ=|ˆ¢ÍGb´J÷»ÖU)ã*÷?úà%£F]ßœ¸â :ÚdB–‹Ž0×PoK´wÕ˜_°®P¯®+*Y¿¢W[ïÉѬ˜4€kê 0EO aVß´7AòÏ"—ÈÛ~œa\£1‡èýà в<{bvZ bÊOîEÑSBT¢X]Ÿü;Ä4C?£VÅ·‡¨ÚFÕØbˆf<¥D׃WŸ¨ˆ|݈¢'ƒXìXyYž… 'ô3E³'yèY Qî-N}üÎë å'^A¹¢è) ð©ÏžiÜÅDQžgóŠZÎW¢iè)›!Z^åÁÇ>œsgÖé ¼(z"þ!ý;ðÍú ˇ¥¥90°¢<&éhEù÷ŠNÿ·Qôt.9DéœTùìwJ!J[Ñê…X}Äá4½=†Ã·‹c(Š6³êðî{¯~‡’kÚ¹ E5ÐæÏ¼u…ÅqgÒ"ŠNë=W® ÑÐÍd­Vô³mM[¼ˆß'¿tÜå˜GkõÝOÀ0;=ú ŠžR‰þq_)z‘çc¯ú¾ÑÏ Ji€Ì(#a{s$ÜnÿmÞDÑÓA*Q}»3jsû‚\ÖéjLŒ°F3À±¢zL Ù–ÏØ¬© ê5¥!zÿþ芢§ƒT¢3¼!¦¾hª±¦ðÕÃÃ{­ž^-ÁF-,ü`™¢Áö Õ7º3Æø-Q¡ý EÑÓA*Q gß<„„o²Žú²…>×´3®yˆ*w´ *À=3åþж(z:Èž(€Î~E9&¡ž$m‹ÿi ð_Ó4ùÁ&­¶¼ÙkãRÝU$¥_Ñgê%‡N3=ÒT\¸oÛ„°÷ŒCÔ”(Úh„Q)D)ú¶F¾µÁçåœW•n!D/¯”)Õœ¦ÿtý‘Yˆ2¥&Åž(Úd>â§Ù2vìΨëknq­œ2u8Dg€ú1[îéa•_{}¿¾–Ö…Æ#ŒT!D·ò-vö¸ÞÖD½aÕø…¬JnoMÂöf-bLC”ÑPl«¢f#Š ˆJÇ'WhQ ŸÞ@}_\j­aœí¸]D¶Õ¨!!z\OwDѦ#ŒÆyˆ&lÐ>lc±Ž~V‰J'"+Bd3DÝᦠj:¢¨\p%š(»[QÑ:ü4Cà6L°Õ«í¤^ØÊ+ÑŽ(Ú|„¸¢¤÷Åß®£7¿S-‰Q ²‡è;­ÞºªŸT%*Š ÷ÅÆ¢`´øz÷A3?)ÚDIŒj¸d W¢¾æ^†£‡§T‰Š¢ÂèZêð;“®øÛôðÇÊñ Ü0¨4µÐe,J7GÀ¹A— 8¡JTpm9=`E†Cô¨ºöÓïòðPÑÌÆ`z}¥Þ ¯ˆæGƧC.l›(*ŒŠý6{Ó‰k¸oë.Js ·$éEßÖ¸N¸Åó­ý®ÉQ·L<„ ¢Ãý‹E‹ÜõêO5=t™¡`q|K˜•¡b3áoEÑSA!Þ·áÉxànÄ|QZÝ­ØÚŽ¢!—_³Ñ1t*ˆ¢B÷€ ÏGƒ®ûYâ®zÕ»£NîV—fªQ•δ=#á/DÑÓA­Q¥‚ÈÊÖÕí NžXšã%ªt9¹E >á1EO!è¢4Ù¦h˜ °®{Ñå›6æ“ûtº 5$ W¯…ˆ¢'„ðÍÁ!Js`èRëzp5{o·F·ÉŒp‘˜;ãîu,ŠžÂá!šÈ —øË~eQè11}dÈhÎá (#á;¢è)!¼ˆ¹khᅧ)bå|8µ{3å=sf<'Át î[Qô”ç=çâÌX¹ªú NÅmÛ†„Å+).è|E…E3aWÂê8+M-ÒS‚jþØbQôŒVj?<œ¾~iµ¨ï)Òy#Òòî{›)*€Î ‰Q*2w\ªgÃ^®ªStŠ„û4D%€[‹)*€Î !@›J¬T¼K襽ºrxS¨D=`l¾\Ô!Æ_â•(z.£G¦…»Œ¡ÃÖ\P%Ñ_ºÅå\_¤Jð´DÑ3AböÝÚÏÝ òšÄå5lªÚ cQôLFqÙÂ]ŠNŠç×ÖD5Þ{ŽŒææ;/«âi¹ЦEÏ!(Y6Ý9µ6Ä‚EÞÚË}!JiäñqQ—géc(ŠžÂþ©µ>:|É7ÏÊÞ¢6F¤L5 ¾{Xö¶™zËãÎ QTлTê^.òºþJÑi1DéÊB‹QðdKˆ(zŽHˆ&ÜÕ9&¶ùYOÒ¨ú$Ëd˜Ä,[OÆüJ?ú©fY|GŽ+Šž BR‰~ª¡¨÷ÛÕ›D¨:Dɇæå¶]3ž—M óÅ告ÏKQA*Ѩ~ßñ×ÒðÜÝÏ®_UmÎðV…Ö hLfÌåoÒ×ç^á(«(zFHˆ&¥õsÆ8ƒ$D·ô*bˆ¹Št‰ü?Ñ3¬Í@à-Sâ?áœd9×G­LÓU& mõ*U Ìíç¶ Ï ´ùï ŽKs^Öšœ›¢‚„(Ñ®¾õçnÞå3¬9†â£Åçù7$3tâû{¨x,ŠÛ;票 {¢‡Ü«°îqD¿U ÑÄdãõ•xŸøîz$ÎKQABtºOQQŠ*×w8Dùëæ‹E|KhûœBT‚˜ãˆ“qˆÖÞRØrˆò×míæÝž³ QQTøçÀypËú­ Q"Løî“í‘!³ìÇÚœYˆŠ¢Â£8Ž|`o{Ýho*ºBT!6o\ŸgˆŠ¢¢èÃspõê%Õà)Ø[ZNš—+Qm£Àâ,CTþù÷?J!úêoÇÆ‹zðö—–}…r%j>½oƒÞy†¨(*ø¿ àpˆªmçž[gß!›œ>/çêŽù›îíY†¨(*øþŨÍj-¶;µJ4~·´Ë[¨±…­[õÕ9†¨(*<ü!pr³œôƒ³ûÒîý1:,„¨žßhœeˆŠ¢ÂmbÑt£Ö>}HzõárˆÚÂgßÏ)DEQáýþï;\‰¦Šº;íÛo“Ë!j‹ àœaˆŠ¢Bôw¢û¥ƒâ«?,‡¨ŸÌØ5ct¦!*Š ÿÓó¬M¸õö-h?~»¢PdˆœgˆŠ¢¢è_~­#QÒ;}êÒ|²5=_ˆRׯý }¨3 QQTø…œ wwÕþâ#®fŸ™jêÕY†¨(*ü+m !êñGBÀÉ?Qÿ‚Ìøl¹žeˆŠ¢ÂïpˆÎ¸—þ0¢‡¸fvX¼’t¦È˜Ú®¢èß8ràå‚C´EÁ3ÞTŸ+Í×b^ 6¢.DùÇÿEE#ÞqÁG¹äи¯RÔ)_ ¬;¶C””(ú7Ž,ê.råzÄÖûëŠ]§œ§Úx•Ç_pˆŠ¢ç…ðsÐÎC4æ“}DØŠ¸êVý£8רM“!ú¨]£+ŠþM#üÇ—C4•Ï¥}ŒÒø Óc<¨›1=# ‘„,ÚdJˆê¥+,DÑ¿qdQ7*‡¨à¢]½H>t(aÊUi¤q7ðUòÐQÀ!j8P· —þ¶E…ÿ›È-‡(–Xì¯D×a£èfµ幺ÙU1a›Ì Ïõ&}QTø»ßÒ£0Ñäb†h7€"Jb”·UKïÇ~by’Ç+niP™±(zÑ¿GK:4\"ïbßðÌ ¢$Fóv‡PìhqÖôûdƒTÂÇKQô<‚q^Î¥gkJí«ç XS£AqZ6†,Üí¢ÙB»TâîEѳ@ˆþ÷¿¦ão™Æœ ½\œ[t8ý'Ï ô‚ŽáÅ¢bÛç E…é¶ÈÌñšjáSkz¡Aa|IÆ\ƒÛun9:‚[*Š¢gðÚ ùðùC~ûX‹v€t7T ƒ>™I¸†i€lpÀEÏáßþ`ëpçò¾uø@;÷M}2>ÖT}©gIQ tèläÀËz« ÆD¤Ñ£z¢\Ñhqˆñ5)º²w7ê9„¦¨•h†»à{6oŸ¢¤1äJ´HØÝŠÓ#¢åÐÚݨg£¨ ^þç˜H ¢ÈÃížù >/¦†ª:DiŠØðŠ„(+*?ó­õô-ä:¾¢GÀ^r%Z¦¯F•al³œ¢‚(Òééqiú^·ª‰‰á-0Õx”t_¬‡¨Ÿ‡¢‚ð—¿$çRJGG54 Ü}7¼p%Z&Ðþ¿+nâµU‰^âûóQT)–æ/\5oª! jC”ù‘JøPcÛ•(p{Š ¢èÃ/aùè(²U1•’ö#Ÿ¸=±å=Q‹óPTk»§qQ Qšæ½.±~ÚÚóêá=/ç:»c7Dƒ3RTk»'Pƒbˆ†y%:n+Ö‚°æJô&\k•(_zŠ ‚ÿ¿ÿàÁqË!ŠAÍûd µ>0DmÞšð0@~Å>Eááÿ4°,†(t§ö}ò!¦Ó„Í2Øáuª(>Eá;¢OoPªD'ìJ³5è#A}Š Âýý€¢G•h£ÓÚ äJÔ~ˆîW½¨çßh`}.Š r'ðäÑrîDÑä }x>8À¬ç6b4.¾bûÎy(*ˆ¢nvk=/çw†(W¢¿ ÖûCtÕ#S´*/çÎñY(*ˆ¢ÿßÏ ¼'z`ˆÎ( Þ¢Á¸ 0||Š\£sŠ ÂÏ .†èÎJ4Î+Ñ(ÉH_·÷W¢Tle ¿bÐ:EQ”V€:4D_n¿MÙûñ ë]!ÊW?˜ñbQ QZ¡wŠ Â¿ýÿ‡âC”úHÿÙ<ûzûB4²Ñm0,7é3RT/ V¢à}—|Ø€ž2(>ï Pd̰¢άDÑÙ!!ÊçF# Kz‘ø¼¦'té€[xœ1úÁTá§8E9ðò3­ Qò5ÜíwÛuÖI•,Wùó&®¿úÑJ‹Ñ‚rëݳQT~G4åþù=©×Gz|m@IŒNЩÅMèÈ Í¿9.qN­ ‚ðçiV Q<<Ô‡‰wÝd»²nûcR QEÓ@Ï]ND“Û3QTþf€Ë©—õšjhs¢…*¬Y[½„˳Ó"Õ? QoÎCQAø_¿ÿäñúëP5¢IºZ×(êcP.lûèZºoô@úHgp&Š rm·FkPJ½=kF|~ Cªb2,=Ž®à!a§\‰N€îY(*Áx,[œz{º`7™¢¤—î*ÂÌïqˆÚº{éŽ+Ñ´}Â9E¹¶ûw–wn1õ6è¢h‡hTÕ&?(7iK:ùyˆF†g¢¨ ü®DP´Ïrè×¼~[æm±eÏ·2#åWDa¢À¡³RTE'ÅÔkS-®Tg.¡jRIÿNóã&VºjÃÞç(…èçƒ3QTk»é¾˜zp±C•r'B„*ýG-{1™£‘÷oh@Å碨 ×vßܨîó9|wM ¼äF„ù/ƒÀݵœ3QTE£CÕyヿ|[-r‡×x^“ t¶_»É¢ÏDQA®í•JÇîž4S"rÓˆ„;¨ŒÑ1¬¢W:'íóPT~²¨ÛÆÓà< J9¯Z`ºäJÔḢ49Eáï>ïÕ•ŽSàž*àˆ¬(Dù%–C”¦»ß8³•®Ùýh!î®à–CôœäNàhwˆ2]Qžb¿“OÕ:†÷£y€6qˆž‹¢‚(úÜ¥9œ!Êü‚C”Ý×5¼Í÷ÜS1Dý³QTk»û@Ì!Šë멎i)D£CÇ ƒ,T ÑÉâ,ÿwä Ñ¥ÞqËödQ Ñ«gÃæ×_Æ™V1‡SªD}uŠ ‚ÿç_¬€E±ýp[÷m}Ã!ÊC4«˜À¾"ø(Â[bÊ•¨·8 EáÇß÷¸T‰¾¨†>8DCþXpb³¡`K*1ãåÜ•:Eáß#ÝåÝ· TÑÅÎÉžoÞA§ K¬x9Wcq.Š rm÷ãå\úU¢u﹞%Âm¿¬³jEçˆËŠö,Ý«Ï˹ïÏ&E¹¶{À¡ƒÉîU Õ†-óœJôa£S ¬9D™sPTk»+Ctÿ™P^d݃ãõ=,8DÏIQAøm>*˜4P˜‘2ú_X¨F•Ë!zVŠ rÃËߣEG¤(i8©õ…¤ó¬E¦\Ž9DÏKQAî]“¢¢CÔÇËCÎdÇd ‡è™)*ÈÍúQ—gÅo­Ú%š÷´“+«cŒnÏOQA¼üÏ‘KÓçÅ%ŸçPïtÚ±wtt~Š Âï‚Pt\”íÄ^W}x}†Š ÂÏôD\îSÄ vЀ{†Š ¿ý™öQ'å ÜÃÆƒÝ“)쯊¢‚0ÙZù÷n:g¬÷ÍwƒCV‹6±¹¢Qt'‚j‰ÞÁ%¢Klù~ÿøk #ЫUt‹¢‚P\XÆùKïƒêÉW—BÄÆùíÔþH_‹¢‚ðÙRÝÅùÕ-<2¥~µˆ¿dBt拓Š QTx`n ö¬EÊ8F¥Ë}?]Çò½(ÊBéê–iýY™þö+¿ÞP0¶æà¸¼=+Š BÍöKýÞ§‡.=ƒýÑgHhW\Ù&Š W¢¬híjѨ5Y&€7ß`‘®`¹¢h ‚„è¼VÑXLWPöýÑéÅOœ'³{EQA˜‹å_¿vþBÜhåÒe¶=²2êr‰À}ù5a.Š ¢èaרAÅD:›€;QdÊ37q?¯‰•(úA˜ñ:-—_ºÅ…£c2gÆkÅ‘(Z‰ xp~Œß ¹°Ùgò{îL­DCu›ê)úÁÙ™è Š ‚Ï!ú.‚mDˆBˆzÀB5A&@;÷ €rÉ Í!:Û>ñ­(z4‚àëü*CçáOÆm€a)DÛïo êQQTtšyóôªÑ9bã'º¥ë»/¡(z$‚ Ó‰š«¬&õÌc”C4yÖŽ(z$‚ ·-z>²Å×+E†h—C4ýàŠ¢Ç!W[3'2³ÆdFØáM†¢èq øâG"û47ß'¹ãM€É#EQAúÿùȧÓæÝ¢¢¨1‚ðì¿»… iŽ™¸Å‹¢†B¾5 ™òâ¾¢} EM„>°ÎÛ2FkÑ)²„(*Èpú>+©Œ^±ÅÐQT<öR[JcÑ…(jˆ Š^ЖÈÎù±‡¨"CDQA픺àdŠnsˆvDQ3aƒa.+†©˜ »ÖBT„~¦hÀ%Ú˜_õ B4EEBÙ—„ÞøÂô‘‹rˆzPcQô8!JõÓЛ[˜9—B´®(z‚ [Yˆ&…6|r‹!ª¥»¢èq•¤§SÚ©P-2eéú\‰Š|½EB|Üù@l+D)T…QcD“®(z‚p‰ëÒnÓí½¿yC†h¯i‹OD>bQô8ÁÜ'ñ‡-†b@EÐEC¦ïuŸ/í|MÀ|¦n«¢çè(DQAð8ü¶«±C;Û­þCœU¼,ÅòV= AÐ@‹gV¡iÇ’ã¦óØ^ DÑE8Dinž¢¶ò*÷%õYѾÑÓEQAB”H+óœ[œàž|@‰¢v$D‰ü˜Œ »Dô-Ú—@ÿ¸¢¨‚„¨Å&®?Cqþ‚¯ïéãEa>䵆†“¿Û*:QT|õ“õ% —æ&ÐEM>2fÿîµ0 ÑÏ¢¨‚\«ß#»xè•æ/ô•+Љ Øß ¡«m-z™U¢Sµ(z4‚Ä(Y&b¢)W¢+`(Ї ôA¶Ù@ý‘^ðr®º¢èq‚ÙÆ–<!€¤èñBHÖ‰~Ѐ"óíQQTüÙ‡4pË‹Æ QôhÁ%û¢¬¨ !pÁ!:lœ¢‚ Š.J!ê7HQA"àu©½j7HQA6€r9DÉ×ÊmŽ¢‚ ütý¥Jô jÜ0EAàåÜ à4HQAÊ{¢è5HQAþK1Dgâ)*Â[èÎõ8D¡¨ ¡B)D_=¤¨ š+Q ŽÛEAË•hôp𣍠¥Jô6FQAB Ë˹ºÍQT=,/ç†PÍQT„°Å!Ê76EQA–¢©¢ns!*…(õj¢‚ \C”t³jQABgÄ!:CÃVtAÐàݯ¥¨ !€q>-n³A£Ë!Ú£f)*Bˆ‡è¸iŠ ‚ ;yˆ¶©iŠ ‚v8D›§¨ šC´yŠ ‚Æ¢STÑ&**B´àmž¢‚ è?iÑ&**Òb„qSA5XQA6WQA4¨¹Š ‚6ZQA´Û`EAMVT„[2ÅœÿÈ){…&±Ö¤IEND®B`‚sleef-3.3.1/doc/html/dft.xhtml000066400000000000000000000452521333715643700162040ustar00rootroot00000000000000 SLEEF Documentation

SLEEF Documentation - DFT library reference

Table of contents

Tutorial

I now explain how to use this DFT library by referring to an example source code shown below. This source code is included in the distribution package under src/dft-tester directory.

// gcc tutorial.c -lsleef -lsleefdft -lm
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <complex.h>

#include "sleef.h"
#include "sleefdft.h"

#define THRES 1e-4

typedef double complex cmpl;

cmpl omega(double n, double kn) {
  return cexp((-2 * M_PI * _Complex_I / n) * kn);
}

void forward(cmpl *ts, cmpl *fs, int len) {
  for(int k=0;k<len;k++) {
    fs[k] = 0;
    for(int n=0;n<len;n++) fs[k] += ts[n] * omega(len, n*k);
  }
}

int main(int argc, char **argv) {
  int n = 256;
  if (argc == 2) n = 1 << atoi(argv[1]);

  SleefDFT_setPlanFilePath("plan.txt", NULL, SLEEF_PLAN_AUTOMATIC);

  double *sx = (double *)Sleef_malloc(n*2 * sizeof(double));
  double *sy = (double *)Sleef_malloc(n*2 * sizeof(double));

  struct SleefDFT *p = SleefDFT_double_init1d(n, sx, sy, SLEEF_MODE_FORWARD);

  if (p == NULL) {
    printf("SleefDFT initialization failed\n");
    exit(-1);
  }

  cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n);
  cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n);

  for(int i=0;i<n;i++) {
    ts[i] =
      (2.0 * (rand() / (double)RAND_MAX) - 1) * 1.0 +
      (2.0 * (rand() / (double)RAND_MAX) - 1) * _Complex_I;

    sx[(i*2+0)] = creal(ts[i]);
    sx[(i*2+1)] = cimag(ts[i]);
  }

  forward(ts, fs, n);

  SleefDFT_double_execute(p, NULL, NULL);

  int success = 1;

  for(int i=0;i<n;i++) {
    if ((fabs(sy[(i*2+0)] - creal(fs[i])) > THRES) ||
        (fabs(sy[(i*2+1)] - cimag(fs[i])) > THRES)) {
      success = 0;
    }
  }

  printf("%s\n", success ? "OK" : "NG");

  free(fs); free(ts);
  Sleef_free(sy); Sleef_free(sx);

  SleefDFT_dispose(p);

  exit(success);
}

Fig. 4.1: Test code for DFT subroutines

As shown in the first line, you can compile the source code with the following command, after you install the library.

$ gcc tutorial.c -lsleef -lsleefdft -lm

This program takes one integer argument n. It executes forward complex transform with size 2n using a naive transform and the library. If the two results match, it prints OK.

For the first execution, this program takes a few seconds to finish. This is because the library measures computation speed with many different configurations to find the best execution plan. The best plan is saved to "plan.txt", as specified in line 28. Later executions will finish instantly as the library reads the plan from this file. Instead of specifying the file name in the program, the file can be specified by SLEEFDFTPLAN environment variable. Instead of constructing or loading a plan, the library can estimate a modestly good configuration, if SLEEF_MODE_ESTIMATE flag is specified at line 30.

This library executes transforms using the most suitable SIMD instructions available on the computer, in addition to multi-threading. In order to make the computation efficient, the library requires the input and output arrays to be aligned to some boundaries so that the data can be accessed with SIMD instructions. By using Sleef_malloc, as seen in line 37 and 38, this alignment is ensured. Memory allocated with Sleef_malloc has to be freed with Sleef_free, as seen in line 68. When a transform is executed, you need to pass the pointer returned by Sleef_malloc. You can allocate an aligned memory region yourself, and pass the pointer to the library.

The real and imaginary parts of the kth number are stored in (2k)-th and (2k+1)-th elements of the input and output array, respectively. At line 54, the transform is executed by the library. You can specify the same array as the input and output.

Under src/dft-tester directory, there are other examples showing how to execute transforms in a way that you get equivalent results to other libraries.

Function reference

Sleef_malloc - allocate aligned memory

Synopsis

#include <stdlib.h>
#include <sleef.h>

void * Sleef_malloc(size_t z);

Link with -lsleef.

Description

Sleef_malloc allocates z bytes of aligned memory region, and return the pointer to that region. The returned pointer points an address that can be accessed by all SIMD load and store instructions available on that computer. Memory regions allocated by Sleef_malloc have to be freed with Sleef_free.


Sleef_free - free memory allocated by Sleef_malloc

Synopsis

#include <stdlib.h>
#include <sleef.h>

void Sleef_free(void *ptr);

Link with -lsleef.

Description

A memory region pointed by ptr that is allocated by Sleef_malloc can be freed with Sleef_free.


SleefDFT_setPlanFilePath - set the file path for storing execution plans

Synopsis

#include <stdint.h>
#include <sleefdft.h>

void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode);

Link with -lsleefdft -lsleef.

Description

File name for storing execution plan can be specified by this function. If NULL is specified as path, the file name is read from SLEEFDFTPLAN environment variable. A string for identifying system micro architecture can be also given. The library will automatically detect the marchitecture if NULL is given as arch. Management options for the plan file can be specified by the mode parameter, as shown below.

Table 4.2: Mode flags for SleefFT_setPlanFilePath
Flag Meaning
SLEEF_PLAN_AUTOMATIC Execution plans are automatically loaded and saved. Plans are generated if it does not exist.
SLEEF_PLAN_READONLY Execution plans are automatically loaded, but not saved.
SLEEF_PLAN_RESET Existing execution plans are reset and constructed from the beginning.

SleefDFT_double_init1d, SleefDFT_float_init1d, SleefDFT_longdouble_init1d - initialize the tables for 1D transform

Synopsis

#include <stdint.h>
#include <sleefdft.h>

struct SleefDFT * SleefDFT_double_init1d(uint32_t n, const double *in, double *out, uint64_t mode);
struct SleefDFT * SleefDFT_float_init1d(uint32_t n, const float *in, float *out, uint64_t mode);
struct SleefDFT * SleefDFT_longdouble_init1d(uint32_t n, const long double *in, long double *out, uint64_t mode);

Link with -lsleefdft -lsleef.

Description

These functions generates and initializes the tables that is used for 1D transform, and returns the pointer. Size of transform can be specified by n. Currently, power-of-two sizes can be only specified. The list of the flags that can be passed to mode is shown below.

Table 4.3: Mode flags for SleefDFT_double_init
Flag Meaning
SLEEF_MODE_FORWARD Tables are initialized for forward transforms.
SLEEF_MODE_BACKWARD Tables are initialized for backward transforms.
SLEEF_MODE_COMPLEX Tables are initialized for complex transforms.
SLEEF_MODE_REAL Tables are initialized for real transforms.
SLEEF_MODE_ALT Tables are initialized for alternative real transforms.
SLEEF_MODE_ESTIMATE Execution plans are estimated.
SLEEF_MODE_MEASURE Execution plans are measured when they are needed.
SLEEF_MODE_VERBOSE Messages are displayed.
SLEEF_MODE_NO_MT Multithreading will be disabled in the computation for transforms.

Return value

These functions return a pointer to the data that is used for 1D DFT computation, or NULL if an error occurred.


SleefDFT_double_init2d, SleefDFT_float_init2d, SleefDFT_longdouble_init2d - initialize the tables for 2D transform

Synopsis

#include <stdint.h>
#include <sleefdft.h>

struct SleefDFT * SleefDFT_double_init2d(uint32_t n, uint32_t m, const double *in, double *out, uint64_t mode);
struct SleefDFT * SleefDFT_float_init2d(uint32_t n, uint32_t m, const float *in, float *out, uint64_t mode);
struct SleefDFT * SleefDFT_longdouble_init2d(uint32_t n, uint32_t m, const long double *in, long double *out, uint64_t mode);

Link with -lsleefdft -lsleef.

Description

These functions generates and initilizes the tables that is used for 2D transform, and returns the pointer. Size of transform can be specified by n. Currently, power-of-two sizes can be only specified. The list of the flags that can be passed to mode is shown below.

Return value

These functions return a pointer to the data that is used for 2D DFT computation, or NULL if an error occurred.


SleefDFT_double_execute, SleefDFT_float_execute, SleefDFT_longdouble_execute - execute a transform

Synopsis

#include <stdint.h>
#include <sleefdft.h>

void SleefDFT_double_execute(struct SleefDFT *ptr, const double *in, double *out);
void SleefDFT_float_execute(struct SleefDFT *ptr, const float *in, float *out);
void SleefDFT_longdouble_execute(struct SleefDFT *ptr, const long double *in, long double *out);

Link with -lsleefdft -lsleef.

Description

ptr is a pointer to the plan. in and out must be pointers returned from Sleef_malloc function. You can specify the same pointer to in and out.


SleefDFT_dispose - dispose the tables for transforms

Synopsis

#include <stdint.h>
#include <sleefdft.h>

void SleefDFT_dispose(struct SleefDFT *ptr);

Link with -lsleefdft -lsleef.

Description

This function frees a plan returned by SleefDFT_double_init1d, SleefDFT_float_init1d, SleefDFT_longdouble_init1d, SleefDFT_double_init2d, SleefDFT_float_init2d, or SleefDFT_longdouble_init2d functions.

sleef-3.3.1/doc/html/favicon.png000066400000000000000000000007571333715643700165050ustar00rootroot00000000000000‰PNG  IHDR@@XGlíPLTEÿÿÿy=¦¶]ÿ xįÆÛÿÆÿ¯ŸÀîûª pHYs5Ó5Ó !Cw}IDATx^ÕÒ±Žƒ0 `s­am†î'žà$†®Â*²wÊ­HHÍëŸm‚Š9¤[NýS)Ÿ0q ï’ÞZ‡ªH€S†éÜ[Û_¡ ”oMÊ uŒ®Uw˜26®Ž–!u­€€¾ŠFV@¯‚CGT» èXŸ¡ªË<&~n‚£%¡‚ÃÀRÚ5ˆ‘E¼ ƒ,ܺŘ`O¯Ð¿!…ÄÖ-ÐIÕLà È0±}öÈ ¼'ÀÿÚx 3¸W2hÒcqÍ2ƒJ€u²˜î  ap@û–ø#„#ˆ<çdžA›&]†”¼ö8ÞÃÿMŠƒ[²Þ˜³÷ž’ ŸTa™Âœ¤òЦ#tô¡‚.TA‘¨”î €§ n( Ö@·Ðdp[Jœ58ûnÞ×{߀õŸnQìÍÝà§ÀÑA· üpt:ãdwe'ýöÚ›ií?_…ŸÈÕÁ[å'‡À—IEND®B`‚sleef-3.3.1/doc/html/hellox86.c000066400000000000000000000007021333715643700161550ustar00rootroot00000000000000#include #if defined(_MSC_VER) #include #else #include #endif #include int main(int argc, char **argv) { double a[] = {2, 10}; double b[] = {3, 20}; __m128d va, vb, vc; va = _mm_loadu_pd(a); vb = _mm_loadu_pd(b); vc = Sleef_powd2_u10(va, vb); double c[2]; _mm_storeu_pd(c, vc); printf("pow(%g, %g) = %g\n", a[0], b[0], c[0]); printf("pow(%g, %g) = %g\n", a[1], b[1], c[1]); } sleef-3.3.1/doc/html/index.xhtml000066400000000000000000000473121333715643700165350ustar00rootroot00000000000000 SLEEF Vectorized Math Library

SLEEF Vectorized Math Librarylogo

Table of contents

Overview

SLEEF stands for SIMD Library for Evaluating Elementary Functions. It implements vectorized versions of all C99 real floating point math functions. It can utilize SIMD instructions of modern processors. SLEEF is designed to fully utilize SIMD computation by reducing the use of conditional branches and scatter/gather memory access. Our benchmarks show that the performance of SLEEF is comparable to that of the best commercial library. Unlike vendor-tuned libraries, SLEEF is portable : it can be easily ported to other architectures by writing a helper file, which is a thin abstraction layer of SIMD intrinsics. SLEEF is also designed to work with various operating systems and compilers.

The library contains subroutines for all C99 real FP math functions in double precision and single precision. Different accuracy of the results can be chosen for a subset of the elementary functions; for this subset there are versions with up to 1 ulp error (which is the maximum error, not the average) and versions with a few ulp error. Obviously, less accurate versions are faster. For non-finite inputs and outputs, the library should return the same results as libm as specified in the C99 standard. The library is rigorously tested if the evaluation error is within the designed limit. The library is tested against high-precision evaluation using the libmpfr library. Especially, we carefully checked the error of the trigonometric functions when the arguments are close to an integral multiple of π/2.

SLEEF also includes subroutines for discrete Fourier transform(DFT). These subroutines are fully vectorized, heavily unrolled, and parallelized so that modern SIMD instructions and multiple cores can be utilized for efficient computation. It has an API similar to that of FFTW for easy migration, and distributed under BSL, which is a permissive open source license. The subroutines can utilize long vectors up to 2048 bits, and even longer vectors can be utilized by a small modification. The helper files for abstracting SIMD intrinsics are shared with SLEEF libm, and thus it is easy to port DFT subroutines to other architectures. Preliminary results of benchmark are now available.

Supported environments

This library currently supports several SIMD architectures :



In addition to the SIMD implementation, Pure C (scalar) version is provided. For x86 architecture, the library provides dispatchers that automatically choose the best subroutines for the computer the library is run. The supported combinations of the architecture, operating system and compiler are shown in Table 1.1.

Table 1.1: Environment support matrix
GCC Clang Intel Compiler MSVC
x86 (64bit), Linux Supported Supported Supported(*1) N/A
x86 (32bit), Linux Supported(*2) Supported(*2) N/A
AArch64, Linux Supported Supported N/A N/A
AArch32, Linux Supported(*3) Supported(*3) N/A N/A
PowerPC, Linux Supported(*4) N/A N/A
x86 (64bit), OS X Supported Supported N/A
x86 (64bit), Windows Supported(Cygwin)(*5) Supported(Cygwin)(*5) Supported

The supported compiler versions are as follows.


  • GCC : version 5 and later
  • Clang : version 3.9 and later
  • Intel Compiler : ICC version 17
  • MSVC : Visual Studio 2017

*1 FMA4 is not supported by Intel Compiler.

*2 SSE2 is required to run the scalar functions on 32-bit x86 architecture. x87 is not supported.

*3 NEON has only single precision support. The computation results are not in full accuracy since NEON is not IEEE 754-compliant.

*4 Clang-5.0 and later are supported.

*5 AVX functions are not supported for Cygwin, since AVX is not supported by Cygwin ABI.

All functions in the library are thread safe unless otherwise noted.

Credit

Partner institutes and corporations

 
NAIST logo

The Mobile Computing Lab at Division of Information Science of Nara Institute of Science and Technology participates through Naoki Shibata.

 
 
 
ARM logo

As the leading IP company in semiconductors design, ARM participates through Francesco Petrogalli.

 
 
 
Unity Technologies logo

As the leading company in developing a video game engine, Unity Technologies participates through Alexandre Mutel.

 

License

SLEEF is distributed under Boost Software License Version 1.0.

open source logo   Boost Software License is OSI-certified. See this page for more information about Boost Software License.

History

3.3.1 (Released on Aug 21, 2018)

  • i386 build problem is fixed
  • FreeBSD support is added
  • Trigonometric functions now evaluate correctly with full FP domain. (PR #210)

3.3 (Released on July 6, 2018)

  • AArch64 SVE target support is added (PR #180, #182)
  • DFT is now faster (PR #186)
  • 3.5-ULP hyperbolic functions are added (PR #192)
  • PowerPC VSX target support is added (PR #195)
  • Modified Payne-Hanek argument reduction is added to the trigonometric functions in libsleef (PR #197)

3.2 (Released on Feb 26, 2018)

  • The whole build system of the project migrated from makefiles to cmake. The makefile build system is now removed.
  • GNUABI version of the library with compatibility tests is added.
  • Benchmarks that compare `libsleef` vs `SVML` on X86 Linux are available in the project tree under src/libm-benchmarks directory.
  • Extensive upstream testing via Travis CI and Appveyor
  • log2 is added.
  • The library can be compiled to an LLVM bitcode object
  • Added masked interface to the library to support AVX512F masked vectorization.
  • Use native instructions if available for `sqrt`.
  • Removed `libm` dependency.
  • fmod(FP remainder), asin, acos, log, pow, log10, exp2, exp10 and log1p functions are now faster.
  • Fixed a bug that was making the error of sinpi, cospi, sincospi, and tgamma functions larger than the specifications on very rare occasions.
  • Fixed a bug that was preventing the dispatcher from choosing the FMA4 implementation.

3.1 (Released on July 19, 2017)

  • Added AArch64 support
  • Implemented the remaining C99 math functions : lgamma, tgamma, erf, erfc, fabs, copysign, fmax, fmin, fdim, trunc, floor, ceil, round, rint, modf, ldexp, nextafter, frexp, hypot, and fmod.
  • Added dispatcher for x86 functions
  • Improved reduction of trigonometric functions
  • Added support for 32-bit x86, Cygwin, etc.
  • Improved tester
  • Etc.

3.0 (Released on Feb. 7, 2017)

  • New API is defined
  • Functions for DFT are added
  • sincospi functions are added
  • gencoef now supports single, extended and quad precision in addition to double precision
  • Linux, Windows and Mac OS X are supported
  • GCC, Clang, Intel Compiler, Microsoft Visual C++ are supported
  • The library can be compiled as DLLs
  • Files needed for creating a debian package are now included

2.120 (Released on Jan. 30, 2017)

  • Relicensed to Boost Software License Version 1.0

2.110 (Released on Dec. 11, 2016)

  • The valid range of argument is extended for trig functions
  • Specification of each functions regarding to the domain and accuracy is added
  • A coefficient generation tool is added
  • New testing tools are introduced
  • Following functions returned incorrect values when the argument is very large or small : exp, pow, asinh, acosh
  • SIMD xsin and xcos returned values more than 1 when FMA is enabled
  • Pure C cbrt returned incorrect values when the argument is negative
  • tan_u1 returned values with more than 1 ulp of error on rare occasions
  • Removed support for Java language(because no one seems using this)
sleef-3.3.1/doc/html/misc.xhtml000066400000000000000000000203301333715643700163500ustar00rootroot00000000000000 SLEEF Documentation

SLEEF Documentation - Other tools included in the package

Table of contents

Libm tester

SLEEF libm has two kinds of testers, and each kind of testers has its own role.

The first kind of testers consist of a tester and an IUT (which stands for Implementation Under Test.) Those two are built as separate executables, and communicate with each other using a pipe. The role for this tester is to perform a perfunctory set of tests to check if the build is correct. It is also performs regression tests. Since the tester executable and the iut executable are separated, the iut can be implemented with an exotic languages. It is also possible to perform a test over the network.

The second kind of testers are designed to run continuously. It repeats randomly generating arguments for each function, and comparing the results of each function to the results calculated with the corresponding function in libmpfr. This tester is expected to find bugs if it is run for sufficiently long time.

DFT tester

SLEEF DFT has three kinds of testers. The first ones, named naivetest, compare the results computed by SLEEF DFT with that from a naive DFT implementation. These testers cannot be built with MSVC since complex data types are not supported. The second testers, named fftwtest, compare the results of computation between SLEEF DFT and FFTW. Rigorous testing is possible with the second testers, but obviously it requires FFTW to run. The third testers, named roundtriptest, executes a forward transform followed by a backward transform. Then, it compares the results with the original data. An advantage of the third testers is that it does not require external library and it runs on all environment, but there could be many cases that this testing does not find flaw. The third testers are used only if FFTW is not available.

Gencoef

Gencoef is a small tool for generating the coefficients for polynomial approximation used in the kernels.

In order to change the configurations, please edit gencoefdp.c. In the beginning of the file, specifications of the parameters for generating coefficients are listed. Enable one of them by changing #if. Then, run make to compile the source code. Run the gencoef, and it will show the generated coefficients in a few minutes. It may take longer time depending on the settings.

There are two phases of the program. The first phase is the regression for minimizing the maximum relative error. This problem can be reduced to a linear programming problem, and the Simplex method is used in this implementation. This requires multi-precision calculation, and the implementation uses the MPFR library to do this. In this phase, it uses only a small number of values (specified by macro S, usually less than 100) within the input domain of the kernel function to approximate the function. The function to approximate is given by FRFUNC function. Specifying higher values for S does not always give better results.

The second phase is to optimize the coefficients so that it gives good accuracy with double precision calculation. In this phase, it checks 10000 points (specified by macro Q) within the specified argument range to see if the polynomial gives good error bounds. In some cases, the last few terms have to be calculated in higher precision in order to achieve 1 ULP or less overall accuracy, and this implementation can take care of that. The L parameter specifies the number of high precision coefficients.

In some cases, it is desirable to fix the last few coefficients to values like 1 or 0.5. This can be specified if you define FIXCOEF0 macro.

Finding a set of good parameters is not a straightforward process.

Benchmarking tool

SLEEF has a tool for measuring and plotting execution time of each function in the library. It consists of an executable for measurements, a makefile for driving measurement and plotting, and a couple of scripts.

In order to start a measurement, you need to first build the executable for measurement. CMake builds the executable along with the library. Please refer to compiling and installing the library for this.

Then, change directory to sleef-3.X/src/libm-benchmarks/. You also need to set the build directory to BUILDDIR environment variable.

$ export BUILDDIR=$PATH:`pwd`/../../build

Type "make measure". After compiling the tools, it will prompt a label for measurement. After you input a label, measurement begins. After a measurement finishes, you can repeat measurements under different configurations. If you want to measure on a different computer, please copy the entire directory on to that computer and continue measurements. If you have Intel Compiler installed on your computer, you can type "make measureSVML" to measure the computation time of SVML functions.

$ make measure
./measure.sh benchsleef
     ...
Enter label of measurement(e.g. My desktop PC) : Skylake
Measurement in progress. This may take several minutes.
Sleef_sind2_u10
Sleef_cosd2_u10
Sleef_tand2_u10
Sleef_sincosd2_u10
     ...
Sleef_atanf8_u10
Sleef_atan2f8_u10
Sleef_atanf8_u35
Sleef_atan2f8_u35

Now, you can plot the results of measurement by 'make plot'.
You can do another measurement by 'make measure'.
You can start over by 'make restart'.

$ make plot
javac ProcessData.java
java ProcessData *dptrig*.out
gnuplot script.out
mv output.png trigdp.png
java ProcessData *dpnontrig*.out
gnuplot script.out
mv output.png nontrigdp.png
java ProcessData *sptrig*.out
gnuplot script.out
mv output.png trigsp.png
java ProcessData *spnontrig*.out
gnuplot script.out
mv output.png nontrigsp.png
$ █

Then type "make plot" to generate graphs. You need to have JDK and gnuplot installed on your computer. Four graphs are generated : trigdp.png, nontrigdp.png, trigsp.png and nontrigsp.png. Please see our benchmark results for an example of generated graphs by this tool.

sleef-3.3.1/doc/html/naistlogo.png000066400000000000000000000275711333715643700170620ustar00rootroot00000000000000‰PNG  IHDRVÉÉ0PLTEÿÿÿú¯Ss¾èéé ÆÑЬ¯­‰‰‰CCCgddiªÙWÊ‹ôJJþ——Zó pHYs  šœ.ïIDATx^ì\=S#É}3€?ª7ÊTåÔTWŽ6ãÊÉãªCB¢|u XH†²x7ÕÕj›HÁ ØMP –…°,$î\Õý—ÁÕý3ÓÏtïHàkš/KO"ÕhêgÞÏçí-L1ÅSL1ÅSL1…~ts¶®M0æ(¥Ym‚1C)ÍM: Úà”²‰‚Ö”Jµ ©nÜi“Œ¥%S“˜b²Ù(½­VÏ&²»ç %véØ{ x5T4,J/xBèSŽK0c±ó—Úÿ–ŽþÇ:Ú”ÒcïÝs ,˜œJiÆýú¡­¥»Wô§fÐÙ÷ŽšXURÚCEà£'ŠÄy²ïzÅ–Z”±¬÷šy}B)Ýäž¿å/xŽ äD›ð{n¬¥¥¯©‹àHË´}Ö|_`Ÿv½=nf)¥Ï,œk¦÷úRõpZZE‹Ÿ¤w]“i4,`¶h€‡* Ç< Õ(þÒ‡à.DØ8Æz3Þõ†ï–÷ßf×ÍM«hB¸°ª÷’Uðc…¶‚jnÈÓ\t)e®Ë×á&¤OqÄ)-iiÁ uña§ÔñWœ>ÛÙíz ¢ –»ª¹p×kÂTLÿ+ `ù¦ºña[K`ì׈L™÷s¦ÇÎÂ! Š‚ïl1¨UÙG-èº×ÚôB=»6ýƒ¨uukó¥PE±Ü3]}‘  £*»LÜçð·¢”Ó…?ï™â¬ qÒ СsÒªx88¨Š’î¢"2|ÒŒt¥Å9Ä÷‡!R&¶"ŒÄ ¦U椔€ÙàAXºÉ]€¹˜LÉ#iãö„"c$È^¹_…¹Øœ€Ó¦›Ÿ™ |Ç@¡÷q @ߘÚÒÒNóèÓá6,àÚ(œ°š WK`f*²e··7'–gºo|VrCÉÒáõÁñ€ÁY¾À»:_7††™Gì Îû<ÛÕü²‡4·ÝEÊÚ_Ñ0v%õ¤ö‡º\Z9X/êúR³yäd3D7Î]±PDº-þŽH±hKú IRkðÏîêýÍ Ð‚_H-èÎf˜>3¡•IÙ0ðÕšï/ú;‹n¼Jü‘ VBÈà2^;¾€óÃaÖ±Ü,ʼn}þnNÏòÞM×%€\%©06¬A÷nÓ ˜ÉyM|Ú•öO>m¾¥8¡/üÍA%>L”ƈ. @šŽ„1äâ#”;R­P G.[žXÄ7ï1±œ™˜îß C"`Õ*EùVóÏš_5\q¡Ÿa™ˆ pœ8?@¤¼?êTt¿»7ÕêÆíá«}´0sÞ+¢|Nª¢ FŒ†ç®ÅL05`` 2¨ºõ+¡¾¯ã‚VðŠˆæ²(Z@Ù0ßG@ƒWZ7ÉP¡ê>Œ¢P>$.ŸÁØ´5m¸"½ÑF: 0^ Æ}³¸*°`ò/VOaE{7gç‡&_öI°‚© †Àu‰¸{#Tr Í»!=Xî)í¾[—­@ ‘ä…鷓СLÑ/‰!iYµý+˜¾AÑéÖº¯3P»@ƒl‘Πèþ*·DÐ(þ梅¦ÀB¾«¸ÓfhH}nc&Ñ}mðRèW-|ÞÁNw3;WþÉ¥½›c1>=ЖݕŸúIðÖècàÓ üö+,Z+¦c‘ݣÑö¿SøÛÀP e ;5µÙDPeœ„чwbÿKklñ %§f1ÌÈÜ&Ðð«ÃRÍÐ£Ð Ôæu…å1PG H@)ˆ:ÀUÚlî —Æ´h¹+û§ØÑnKt Œi£KÙ=Åg€Ù é³æÃŠ@v š“%úÿ-îø``!r9÷ǵ™ô'•"òé'ø¿±À°)"ûW¶)¡p.ú’™9zòb ¼6¤‰öœ¨+ô+P/ÚôÞH¿û?#íÂðõ~’@…>ëß—ë_W)­~°C˜»p‡#wÔ’w õœ-T @¶C19.­¸¥&Ñ€®÷­FY{³ƒÝî}B\,ã"ع/ðqhB…˜9ê‘îݨ?Pa,[ÑKÈ.œšw/zè ‡(/ykQZ½6™£,Ê^ÈÞðsT"œHëäGÚ—ìß[c+ÊqÑs \‚ü(«2òºï~’š0ŒÐÜüÈDY}4›öðA‹@5ˆ¨–×OÄþ¥ý—¥L 2Àº‹ðdܬqM*®V-1é3h̽)ÙÙì2‹ ô{fC¨É¸Gbô´ÆìRr?dÆw´çBô­Å:¬&9_îG‘‹h~0§jº³6¿i#;äGV_=ŸŠl̪ ÝÜÆÖ¢°CÜ Õ×<â"sbßœºéÌаd÷[÷/üä¿ø<€KçWTå:د[ïã“åøþ£+¹97: 1hÆ—‹<“·b‚:’d+îñ †%ëȆQ”¤:'œ£;ØsˆFYþ¶²jÐuD@EPù¬$o÷B­„{’€\ö(dðËD6^ ±"nË(Hð¿cêäìO†–—·d±1ë‹mqùh†W”g‘kÐ]&bFÍ>í¼Š¸h…zc˜ý!þ ¥Ç£!³Cý¹m4’°Kw(-C¡êÃE.†Ý^òø±/Ë`)Z¾ 8“€R…®' €²ê¹¾*œÕ’²Ón_£>6ÍJ+Q [óŒ4ÐpçD œj n4”Àά°2¼ bÈ¡0ör©Ô 1F—=° ((# 5Äв¦:Rzw–´ç`6èâLñÄ@ïâf\Q×'+-Þž`µªMLs“ÂIÂJ@­¡R›¾4ÁìxB~uãÚNÉSÛQþsjoœ ®N>êÐDd½º:>½!—0,™øŒq=—´’¦g™Õ `ÁSÝú2ñ˜0€µ±èL<ÄM$„ñÜW…ú‘ $OXa;ûpÔ =.ø EÃÛE´” <¶°½×oxnßLYI8–ÁàÑÑfh+M=Áú˜fÁßò¿˜ÓH›öRd—v›Û棶Dó˜ÒÓVÂØKBn Ë'”²ëGµßz¹€™¤¯ž ÉOš"þ¦Za[Ýlåœô…}üÌ{õ:¬†ßýISÄw?hjøõWê7ØbÍò.^æ<²¥­‘6Ä|óGE·#…?kjøúéÕ;r—n=ª‘51{ÏBW'àç¼¢ü¦ðD‘ªïU}@ª¡êƒÙ¨„×Ú(†”]€äóДð÷BAÍôÅÅà¸9ÁÐÇñcn‹ å øÛ|^- …/ÕBÀââ_F˜\âê¾:JZ#—Âp!Ô×óªÏ+Y¶^(žh*øÇââ¢r:76±k Ïå­l';zW%„b)+Lÿ¹#àßJI°p÷$ß/”°Íí{Yô6¥ŒûS».§x”“0#ÏQK‚ù¼¢üèð¥šÀ”A0%C^pxLè)àÝ›=ç}w–k!¯J¯ÝuÈ«$B½à≢,>5G~Î9²¢K€a©V2ºwgjƒ¾—©”UQ]ªy€‹ŸT’ [Å\Ø#ïeê 2ZƬDeér ¤FÑ<@)~Ç øAÁ<|5jË™B!tP6Õ ›åû}«„oòy5 |ý Åàל€§#•0Á!ÉkK]Ý7ü›PZAYT1‰þ—¹³ù«ºßø öæŽ ß,rc»ö‡Ô*R"‘Pª¶„vA….¡~!$ˆ‘Ê&›A0¶6c!ã¸ÙØ‹ÄÞd¤Æ$ñÆCÉ‹ ‹.uWUBQW•möAuï™™gΜ9÷9ÇçÜQ¿+‘Í=>Ï÷s¾oç$.à 08 P ¨wøf‹  ˆ40&Œ;b‚ï; ðˆ;ÖäÇ@a!·J ÐÛ›7l`¼7™ ‡jèBA€¸sªã,âúbó3ÃäˆÚbÁXp‚!ƒ 91Ì€˜ÎêoÜü÷óâ¹Lc ûÎàZk ð6®6˵ B¢k.ô|@ضî›|«Bò‚Ð @È h ÂSè–ŸӺø <Žmì` a1… @hÇÅ_‚6 $ ›@\˜cš ="$aì²Y—EWgb—m£`ÜØTÄî1s!‡ ×Ài|¾/¤w9ïZ^°·Yéu†ì@Í'@ 'À=šˆm‡0Ê3]4@T3„‚„¾Þ„ Ÿ½ä8›”XS«!2T4@ {È!h^rèn W„ˆ0Že A®@vŒCpáôŸœ&'äÕ·m\! HAÂbË2)¢¤gbau;„ ‚!ƒ @hnŸ¹Þn\laͰ À (CÐ>"lïÍÌ® H}a©XL rØk`joçùvtW¾À0jÚ@ƒä <³aI0Ësð¸·lr @„€ ƒà¬ÎÁx6»œß¼ÛC $ù1ЄSH‚Y GÜ—³w! ,¦.À>5Z¼±÷«kÏÙ¬ È@‚. Œ÷ö€º`ÔcAî¸B@ÐRü ­¡¯¢¼è¢€C ä à ä6sg%óÖàÞ ‰`\   rÚ¿›Ý1ôn$ÀÀ‚Ѓ @H ˆˆÐ½ü‰#É BD€ W'VÈá^ð¬•Í1‚ P-&. t5™L*$¦BAB+Ð7ùˆ<© ÂŽ2úÒÀ>ôÍñâHT·ƒ añy' €ÅÎ5MËC–äxóy­Õ)ÝAˆKÍò)  0„‚ˆÝo†U-”vJ´iìOêàƒ 4@ '›œíP‰–î4]@ˆH.À7ñf—‘}ào_I€:9ƒ @H ˆÃ û›]Z÷p£oþÏ&ä@$ÈAú×@QëÝ÷K¹Urd& €Å ‚p\î ¯h'§p Wê,Ç š‚î œÙD­CÓñìbGãî. Aÿ .UpÆHaµE¡‡q7""$„yÃ#IØ{™ÕÀh-Û$læ. ‰Õ‰ páÌ ‘\È­Mvä 9B@Ð7¹¢dªÉ…À®9C  Q÷ B$ûæi^r²ïfö˜BÓP€_Ncû‹^p– +9™=‰ÕAO D*´g7/dÏàqëq. ä<.:§Aw»U°ùïÆâJrº+.„}/@È!øÒ·„‚'î„Ƈ?dûu‰Ñ°µØ$Cˆ޼#œ‡à?>êh ¸O<àihÀvûÐÐ&F÷[ëuÈ£µÄYGBÁøéCÆÀäüj/w!G÷âÞAs°Vö!BÁàüîÖ¢c$ˆÚP¢+„‚  XEÂj—‡'F‹é×®*ŽÁ ã‚Р> d °›¥Åÿ\ò™x@¼ÙZš4ƒ ÀÂ79Ϥ9ž²r«ƒUxA,Qm½¢„H>\À#±FG9;œx‘@0ýòÀ„B܃u¶d/Ø&³‘|,Tr$MNÁ)?68Zp¿Úñ± >WÏÇK˜»~ò1…`÷¬Ø .@øIs Õ{ §Ê‹j_ š£‚[;†ø°ãëÝ£Â?y héÂJBÉÆÖäõèqr]ŒKb7¸B°»MŽrÐq/˜DÁ}'°ÎßNéÙÖ"ÀS.Dü+²^D¸`~Lœ€u¦¶Ó(Œa!ˆ}òo¢tP2 ¸æ jëW°Ž„§8l” ,R <@ÎèÕ‚?;§$(Ic¸ ‘4zg; ôûl™le'€|0ê/‚H"kfßÍ99׈µ/÷EÒ •'B*?%uQ$Ž_ ܳ8ŸŒÑm]ÖÕŒ'Îo²9!{ÂY6*œÎ^|3"BÛ¢@ù²æ½¢ƒ¨› ,p‚ *"_ ð’Ðýç³5óÛ•x¬$4Æ_KK6ÍA ‚è{9„uÔ_Áè]€½0§# Åÿ…ÐÁY?DM0„Çû¿ Ð)€€Œ’Ò?ïä¥jµV÷AÔ 4|Œ ­ÞGR+ƒ¡ùMÏ×=A®ýÂ è ˆ‚t $i!…qûKµusžZBp9 „1¶µX€²!'ðÀö´ݪö4€Ðmå~W_DM0„Ǥo‚ô.À„³=¼Ï´åšÇlʼ϶–8R쀾Ѕà‰Î` -ò‚åµVRò“S éz¬–¢Ú:ýaöDƒ¸!ƒ €ÔÃlAØJ7z;U¿Üú°š*B\Ǽƒmà‚‰ 2B¼"bM ßÍ_(•E’t| }¾ë#£-0Á1'˜‚ðƒ`ož'´a(êÁ³i”»¯‹\:Vú² _éPyDâ—p áÕ âì@Šœ÷TêÿŸ”`£RšHò¦°ÚNß“»CÂlžy_L”ÇâôÓ›€Ë_$ŽQ² {8¬ ºCáHg@hºDIl;õ–ûÉ8C Ì>ê5€$¨áãt®wßMmfc³Ôgu?Ç@4Æ„‚XASÂÕ5Óu8˜l`6½Œ%ˆbg~'CPÂã±Ah€·ÙÔÅ%©A«t(XUàÐÐAÙ/…i"Âe„‚– \­íÒ³Y³óUÜâ‚z'Д]4À šÆ7& ´.L”0Î`A=+‰¬²"o‹(xÖ nr~÷}ßÔƒð%‚p‚V(Š=5œrÁÀÉ'öAh€A °a;]€'‡7 ‡ÕÚNìµ1FQ@H ˆ ´á…tƆ_/ZdyBw„úHP§¸†"èz1’T ææî1ʈ@È Z‚ðB²Aep$~R$̆ä¿}æB`ˆ™ €xüi B̃D… U^È×v–§ÒÆnÈ‚!ƒ @H`Y.Õ …‹“CªÂÕî=ÌýCY‚8ѨN€@к<0uƒj²º¯“Ôý`!S4ÀÀAÈãÄžÕ¶v–âYì'"T@È ˆà3B„ü€ ZœBAh€»B%¨æAÍœ @H 2Ú×ç7¾ÞYRkF²•ó @È ðŒO˜~ДNŠ=ë¡ ªNà0ƒ @Hš_ª<¦¾±Y)Éö¾×\ˆ„/ê ' X>¤}VŽ+$%jAíu!ƒ 4fƒ°n×#5Qxoyw“$ÄÉ1P: 2`ABž‡«e“`z~y·‚`Ñ?BAh_çB´BªI±°ÒÛÁÜź_êAøPA€G'€ÚÌ-]Ï©*®! 4º‚0â¯míÒ§ÓÜ!ˆ+ä5 d ù·q"ê9·$II+¬ÿHP¡ ê<¼£Nážå°EFÃsƒ @È  àÓÜ@8ÛwÑø«~ù‡  BAh›Û „xz1­a3!Ê­a™àˆ' D;xÿù¿9,gT­­åzc @È òÑ “9By48¬ .À'ÍîÓ4—Ý H\@X—cô Ä!Û‡ú`EúþÒv·¸\÷ئ×ÀÏ™„!drávaÒµÞðÜõf orZÔÉgS ‰u¹“ú ú©›î$Ãò/C,dXþDÍ DS4bù8ŽÔÐuWòë"à.Q§à ¤ýâå´6Ä#̘œ@rð}¢'„1 0  Dî/ê½'=.©#É=‘,ÃÜ*\…cA€j‘ Ñ@ÓR‰’—Û¤’>ðÝìÚè·c ìX2ON (ìo¡ƒP8Ømå¸.:ê·ÓV¼r!°‡?½ð‹ ßúÕÀ{"žH•¬ˆ}ÑL è® à7ÆÔŸéä΂ûlV¼s»·S \é.Àòq/h'±P !i©ôQ ³â:Ì䦈ەƒ ßuáõo TEõtƒ”±ù…¨ÅíJôΤ¸ƒߪˆ/»[P`L° Ç1Uñ“bÁ .Lù‡Aç¬Èüð÷UÚÉìl„pžÐ[,äôˆÅ0À×à ïL}ÀÂC€9B°ãPRPDq!3P ÓCv¡\r!(¤ ¤LÌ„ÔpH ‡+ J®<Ã Ø +Q&ö¦;ù\qy7(M/Öw-‰}J Øó(?3¹2È/6².Ü-´c­Î‚] PŠŸç„¥jukçâÜ䤶Sl\ü×vÎlÂ¥|Å]@h–‰³Jc¢6¶³¾ÔPÚhÅŸ¾¨Ÿ— ¼cHêÿ¡òêxµ¶uuin2–o“çቼ hŽÄÅ*‚o ,–`Ø ý˜­·óÍ…àoùÑälBÜ0Æ¥(9%¶´5{ƒ`Ü¿§¾b]ËcY |¾)n\mG+þúBȬx1)‘bìä¡Ñ¨í ¦®ÜÝc$£5LcjkXR XËcÕ’AG`L"A‡\ˆ:&÷h€ÌŠä¥<Ì/}«Z)‘Ã|G‚ú1¹×B½[}~£EÀwA@?&’YñТ<À*ÂÕRiýj²†û's† õ%4 U¡ýô@±’¤Æ&çæ—×T™ll–ýAÌŠ¿¦_8sBû¦É…á´ŸœOû†£\#AüŸŠ8Ì è5"D-¬®ŽÏ÷Þà‰m!xÄ‚åª8@HF¥«¬ˆÉ«“×»c6‡z~¬Ë­a¡þ¸X€/À{ÊK°f¾‘ ªâ!Q€%,ÀxÆ•ª+yG‚¨ˆ„dV<ôS'DZxB¹k¶m ±‚ à„Sb4RuÛ¶/Ä.DU\hà?‚„¦ÓW— H0Ø0Õ?;¸ÞÈ+Ö©!¾À>"Ôh ÕK‡M_</t3$;KbÜ!ÈÂTbB@á1v)U«ÊÈȪt—ÜÈ zaI2ƒmñ‘ÓŒŒ=B¢7")¬ŸiKcTùB­a! [Dˆ¤°þ¹=ÔÜ EK„ ‚Ö5Â1ɉÊßÛ–^—ôA(@z ”!èB²·Ûhߥ­aþ!U€;ë»}é°ä2‹%BA/ [¼:„ð­ô#… @Hà5"œV¯É)fôˆÞ ¨>¬ÂÇØ8ØìËŠ­Øx:T?+r„ÜT’𑲳½œ`š1Ì?”AÈ è?"TÞO\Dýâî‚jDˆ·$=ƒðbŽÖW«h˜•Ôh–ûzÚ„­ÚZÖžÛšáú“¹T PØ7Œ)¡À9ýíéŸlʳQA‡Á¯]@ˆq‰sº€wAW3=3BA7¢8¾µdr™P‘ W"ÔxïÀ6"TÓ¾ÑÍaoMí“r‚[ëyUÅ¡„È…'[ïŽb õFUdÅ øÈ‚!@4`;B¤Š¼z¯1¼}¤†¬Ø¨ ˆˆ@Ð1"<ß’½\¬TJªµ{IÛþè‚;B@Ð+ñ|RFkL¸µÞˆó¨Šs"" ë{‹§7ÀúqÖä‚zà0ݺŸ²šÓ8c{Æ‚¼*®! 脘™ p-ûó?êùU:€ü‰^—$Riú…`!‚!à„°`¡Ýɘ—S™l‚! kç4:¤VfböúìõÑA ôÔ,´3\`ïÕÑ÷GèภàN€Ï®ê†‚‚¾Ç‡'œcx$¨F„Üø˜#üPsÖÌnE:)yêå.N@Açfù5Á'PTÎÒ;3?ŒÈ@߈íuŽðT&ärJ—œœX³I‡þˆ7Æð;“^‡¨|ž†…¦JÑÖ"ÃÁ–åP)Ú‰GA8¸U˜{jôUÃ7ûeaxk)»š`hת×~&̽FˆÛ4«é—q˜Ò"†¤ÙH!ˆˆð±™¬AØêíí¥ùd1šÚÂ9’磂 4€ê„!¶xTA{ˆzÁ~ùÖc$ˆáæLöò Bü‡>4Õ’Åo«€"S‡ @ ¸Ñ,3VÕ÷Ç`nœ°‹ϸE‚°¿#ÅBÌz|b§œ!éYˆ„éPÕD.ãtÐk*Ãe¥jöŽ×c ‡`¬ïs¡°ùÝžâœx$¥ÉýBQ|;+Ôh0/<Ñöà>YMc ½.â¡óa°œÄ»7È.Ë ‚Oñ;“¾Ï „°`fy‘|n¤¦ˆßçB+Ë-ÊïLzÆ„©Qb£‡ j‚A^áÌÞÖaæ :AP(?Ûgj4­”Éä Á…`b ½4ËüVà?ºì €ü!ØûÅÏy¡ô´ ±<cøIù€ð×¥®sP@ÞÄoö~¬L p5AªgüG„jCFA®+¡0Öη@þ4¹8Ð?Ñ!¹Ó‰†Ç}(à9S·ƒ ob*6:Q/Ì´:nS€)_2† Äì„èûsrÐ+Ðô§ûÆ®Â#>A˜ÌDŠßðœÀÿa'}“ìxÙ6Ò..à»aéÐ 3;ú!|à7ɯøo{g¯Õ8’öq'z-£ Þ$ÝÎ^@m`º1É*lã¤&HD€›f9€nšdèžÄ†¦Iw³½M÷v»T%ý% `Yušçô™ÁöñýôÔó]%5/üë&à?±ü÷)t£1Qnÿ& ûõÏ•…ðëÙ4O{ÿ™mWá~Á0Ot@¬Ÿ€ƒøsðS›šI޵PKZþÛg`f†Z{f²èÆtÒ¤žîÂθÁŒ;jÿ¨è+8"©ƒÍ‘þLj57šº‰XP>‹ýp1J ¥`?ýöã§ÏØ)2Óʇú[Aµꎙl qG´­ùÓïÓ4zKs–š äSãšKu×SëH¡õ'°§ö.!è'i.Æùl5a5M jú/µâfVÃi_Üimvb§[bÃðÔð@]¿'´G_G`v¡¼oNðŒ™É‚ÙYz¬ÃP[ã×aóÖà3û„´5ПÎíå'¨ÆŠ£©éƒ›'r… (u`úºÛ›Ouˆ³!Ü&ŠŠZ®!Ÿï›gâ¿uµ‚Ê—·ç@YhÿGâ…®Þºˆÿg¶Tr°½ WSÈg¨?‡õ ,¹™Ê¯è)§Q@7_3•®y-¤Ëfü^X:‚ 8úæ}Ät³H_}å‘–‚Cò:læPpgÅ”kyóÿþýá…ë™ûiôá^¦…t¤e&8 roȦ"p6c7E=~ðÔ¥÷LÇûHoW°O”zæ=Á¢‚˜Ãgn&x:÷ÄQЫæ0„}ôDt̓pLmCXs3m$®› ¦{8Gû`| jC‡ºLäoóÅÄÆþÕö}Ê1P®ÕH@÷ýÞg¤ ag¡¼"ÐiØ<ºlÊÑ[[(·vtª…„‡huæO«tÊŠØMB¢‡‡.ÔhÕ)¾ÚŠgŋؚ9—ú˜j^k` ;VƒÃfÔû}×G$ÕeÃ…²V]l©^͸>Gšt€|J©@z0ìíìK‰zÊ€ªÞ2×ÀZ²ÂͨéB•c­–Ù ì?~cñ°’n¾@@I?}`ÔK99‹Ãb  l6ÿ 9¤#‘”ÒÂN‘ >ÔØ^$¤FN%ß(« làˆ,¹v-\ƒõIÕ(б[¥n´Y R/®ìüºÀl‘‹ ×SI‚ÍHOÑ·˜º‡‹ ¼Wßí»$ÆiºÀYÂüõkSŭʃy‹Íš#î•¶LÖk„K_öÞÝ.ÀÖ«C ÿüÈJüû²Ënjז‹ùðEŠkÔKUš»^ ÔH0ö˜\ê-l]¯ã½ÖÒ•¯åÁà[Û÷,q³ß)º.(@)Ób•ð¨.Ø “öÓ‹ˆéÒ³DIÀ~¸+[·;‚ÍîÏ!Q²p-û¨è5ÑcYh J·7t$Wn:úa ŠÉ®µ¥\«ÃÊæý¤‘²XØÖ%„vù‹6)M¯l6ÐGîeBï..^Ìe+ºíø©\ò¦¬´µ-Cg©Žà¢².§ðØtß¿Œ¡K‡+ h•¦ âÉ»C™ÈÚÆéR øU”EðÒŒB¶U¸:2\Ùë¿ð\R‚m7Œ2ªƒòØ ³^òBZX¼%à¡vý‰ ¨å(ƒjÞâÃa/o­¤–' vÔÿmB9bÖöËk@+ùh^Q"•£v¯¢4Dèý—u„("Ú¶Jq¸D-NÌúƒbØËH7P{…ž±S‚ƒlŽ?‹rTÄè’ÎÊAk›«O‹‡;’šé¡9úÒÛœzÑå‡.ézQÛÈ[ù,ÜÐzñ8ØR¦n ±öX\‡m­|OŸ, ¯%>X†Áíž|ç ²úp»™ÔÒ´9ÚÚK*<éºkP HWƒØ8Kj=Xx¹zGj ñŸÃ"í\[J÷f¯NJ௑B p±–Õ~  ¸Â8ލM¿u_>¤¡ƒ·r¿ÄZ³lËg)¹·¨2Þ »R¸äØDÈrò”ý¯~bø]«Å–®³¥ô >'  ±Â¦8¼¿ÑG;°n4LÍ#_ùöp,Ì-²}\DB1z`’6[éÆ˜û¤£5X!kÏÂs×atŠNQ ÁhL1ux2ÀãiTo5ŒŒ‚_ÈÒ£<P[BZÔhÌ6Èx[C½BN)u*qÕ¡ùâAqõÃÅÅÉÎt@ž@¾á°BV@ HÓƒUh4*ƸŠ^ãæ™·’ï‡W„ Ç4E+ÀæIÔ› FÚU—P–ÀûÛN£±wéz–ÇÝç‰Ñ­j1ÝÊCvxAj|H0@“\‹¯xyΉ},LI˜~ÚLØ’Bˆø'ÅG o޾)0[“+ÁÆ H°&Ø%#¡³£¦KïýïÛ\¦Ÿ"Æ3ö¤ÜÒ÷ú^þÌ ?Xwô%.XpTìa³4›çèSo—|'­¤Í]åUÒæä–²Í2?„Úvô%.X¨£:d˜ ÀجO>+@Ÿú"ÒÞæÊÞz tjÙ!=mµZêKÕÆü4€;É0]ëƒ7®Í²”èûÓ^s5ÀŒ<ðaaœ=yÁ €Š YÀ%Þɨ 'àTn)ÏP>b€Åüîä8G”:ñ¹±+pTê!€MäðÞ¥^€uˆ‹="þ“êâcàƒ@2=K¦2T»ˆ„ K¾j_!÷I@\y“•<  )>P4 •ri=‹8íK^PyѯI¬B÷{įl’v€úþ.Lèóð{lŒO…Uáœ8\C"˜`º6€@¸!Ö¿IœÞ.ñ§×€ué‹“ž\ò @ ‰` €qK†Y*Y1ÄUpù&kþŠ‚Çr¨ÿêJP)Ø`9 IZ¦k…Wãå,u2œRàŒ ”škAù(V@@=¶ä€@¸ªÒèi#8ª“ߦ׳ø|MµävQ̨¨7ðHæDOºA¡!›N°M¸H1¨¼(  ¢!à,ž#âɈç)&± ¬Ž‰€i¾Ò’ôðIp„)¦tÒdHBžÈ‡¦Ðþ*Pö$©Y@M„SºªŽØ¦4µÁ?žD0Ž0 )ay¥ÞS6‰Ãˆs<­¬# ^ ÿi¡#¨Í0²~ÒfTäºzK:|J hr¶ÊëìJ¤4 Fè†êV·2dEÄöj.<£5Ás…Z9à“ª¢oì·òŠ¢Qú§ҳÃVzr¹—@,‘RÕ—>ÒÐ侂tù4`ðÊ*8•ÊœèmÔD@Õ6è™ñ™/,GʳcùdF(àÓ ¯šñÇà™8ö™qš!Ð/Uˆõò(G¨†HAMÄLÏ·û2k@5lKå€# õ¨…ÕHë) ±J©8¸&ÈG( -H9î{€ÊÆ€twªwÄaeÔGx>8BðÀa‰Á@€¿uÉu‰5 b¼7?&Þü,†è¦G¥þþÀ~ü«ü b¼ûpùpñžU~V)àa·¯ò*¯ò*¯ò*¯ò*¯ò*ÿ Pãö&±IEND®B`‚sleef-3.3.1/doc/html/naistlogo.svg000066400000000000000000002463511333715643700170740ustar00rootroot00000000000000 image/svg+xml sleef-3.3.1/doc/html/nontrigdp.png000066400000000000000000001352671333715643700170710ustar00rootroot00000000000000‰PNG  IHDR óúX¬bKGDÿÿÿ ½§“ IDATxœìÝy\•eþÿñëÊr‘EvD\ÄÐ)Ò`-)- É%kÔ\ (+EÉšfÔ4sF¾YSiDF™!2ŠÙ¯E[P[\ ÃÈ¢‚(0¶óûãþv¾gXÎ}Ï9çõ|Ì÷}Ý×}]Ÿëæ;ßǼ½—£Ðh4€¾î†ž.s ,`À‹@X0À"€ °`€E ,`À‹@X0À"€ °`€E°Z½zuO×€˜0a‚âøƒ™Ïí)&ª™aЃ;À³:sæÌŒ3ÜÜÜìì솾dÉ©=)))<<¼gk3©›o¾9==]»k¬õšhØz×°tǺ§ XF3mÚ´I“&åç绸¸8p@:4kÖ¬ž­ÍÌL´Þ>/^,--]ºt©···­­mppð“O>)š0aBZZš´=vìØgžy&***$$d̘1_}õ•Ô^YYy÷Ýw;99nÛ¶M¡P\¾|¹Ã?ÿüóO<áïïïââ2}úôóçÏw.£Ë>UUUîîîï½÷žÔgáÂ…“'OnkkÓ3f}}ýO<1tèЄ„„;vLáããóùçŸK*** …Z­~òÉ' ’““‡>{öìë½xñâC=äáááéé¹páÂÚÚZý×AKÿ°cÇŽ]¿~ý¤I“ìììBCCøá‡wÞyÇßßàÀ‰‰‰Æð+fÈ_G—SëÛåÅ;vlrrò¤I“‚‚‚öîÝÛÝ…ÒõÊ+¯ :ÔÉÉÉÝÝ}ùòåúר夀>Œ 0ŸÁƒ=zéÒ¥»ví:wž{÷î}ÿý÷ ’’’æÍ›'5Ι3ÇÃ㪪ê›o¾Ñ&Õ~øáüñرcUUUcÆŒ‰‰‰ioo7¤§§çŽ; ÓÓÓ?ýôÓŒŒ +++=cΟ?¿  ààÁƒõõõ{öìqssën9›7o Ù¸qã¹sç²²²:3gNSSÓ™3g .\¸0þ|ý×ÁÀa…Û¶m{饗jkkCCCï»ï¾C‡åçççææfffîÛ·Ïð+Öþª ŸZWws÷îÝ………³gÏÖs¡$EEEÉÉÉŸ|ò‰Z­...~àô¯Ñð¿  Ð`F555Ï=÷ÜÍ7ßlccãããóꫯJíãÇóÍ7¥íÐÐЗ^zIÚ–îò]¸páìÙ³Bˆšš©ýàÁƒBˆK—.éž[^^.„(++“ú´¶¶:88œ:uJ·ý}þö·¿1bÀ€_|ñ…þþÒ]ÄsçÎuX ··÷gŸ}¦{î•+W4ÍM7ÝôöÛok»ikþñÇ…çÏŸ—Ú „?ýôSwסÃtÝ +¾qãFi[º)-U¢ÑhæÎû—¿üÅÀ+ÖyXÙª ™ZwØî.fhhè† ¤m=J«¸¸ØÆÆfÇŽjµZÛx­A@Æ;À³rvv^»víÚµk¯^½ºcÇŽ¸¸¸€€€iÓ¦uèæêê*mØÙÙ !*++œ¥ö!C†t¼¤¤D¡PDEEi[ìíí«ªªn¼ñFû$$$lذa„ ·ß~»þþjµÚÚÚzذa×q1„¢¼¼ÜÚÚÚÏÏOÚ•,//÷ôôìò:¸»»>¸———´¡T*•J¥“““v·¡¡AÏêt¯Xg†T%;µ®ÒÒÒî.¦···´¡ÿBI†ºsçÎW_}5..n̘1ùË_î¹çSÿ½Ð3lmm—,Y²~ýú“'OvÀy{{ÿüóϵµµRîòUU)õÕWzžeÕÓ§½½}Á‚Ó§O?räȶmÛ/^¬§YYYkkkqqq@@€nû€®^½*më¾¢|à ]¿väëëÛÚÚZQQáãã#„(..–»«¿ƒî†5!WÌ üýý»¼˜B…B!mx¡f̘1cÆŒ_ý5==ýþûï¿|ùòµþ}ïÌçÒ¥KË—/ÏËËkhh¨««Ûºuëùóç ü!œáÇßzë­IIIMMM—/_^³fMç>~~~ÑÑѱ±±ÚgwíÚÕÚÚj`ŸuëÖUTT¼ûî»Ò÷¥ôô÷óó›>}z|||yy¹F£)**’"ÙM7Ýôᇠ!Z[[SRR´ózzzªTªÎ5Lœ8qÙ²euuuµµµIIIÓ¦MÓ½«©_wÃÈ+fÝ]L]†\¨¢¢¢Ï>ûìêÕ«666nnn …ÂÊÊêZÿ‚YYY©©©f[;ÀœÀó±³³ûå—_þô§?¹»»ûùùmÛ¶mûöí“&M2ðôÌÌ̪ª*OOψˆéûF666ú¼ûî»#GŽœ4iÒ€Æ÷ÑGio!êïóå—_nÚ´)++K©TFFF®\¹röìÙ¿üò‹ž1ß{q#GFDD8::Μ9Sºßû /=ú¶Ûn»ù曵“®\¹rïÞ½ƒ Ò}W».++«‘#G»ºº¾û_R=ÃÈ+f]^Ìd/ÔÕ«WŸ{î9ggçuëÖíÞ½ÛÞÞ^\ã_pÿþý»ví2ýŠ=@¡ùí§èE¾üòËY³fuùC8]â0 ×ÈËË+**BTVV>ûì³>ø`OWz>‚è5.\¸0kÖ¬+W® 0àÞ{ï}ñÅ{º"Лð4À"ð4À"€ °`€E ,`À‹@X0À"ôXnkk[±b…›››R©Œ‰‰©®®6¼ÏO?ý4{ölWWWGGÇÈÈÈÜÜ\óÖè}z,oذ!333''§´´´¹¹yîܹ†÷IHH¨®®þî»ïªªªÆwÏ=÷´µµ™·|@/£Ðh4=2±Orròã?.„(,, >sæL`` !}‚ƒƒ…gΜ *//÷ññé‘…z…ž¹\SSSYY&íÙÛÛŸ:uÊÀ>+V¬Ø½{wUUÕ/¿ü²uëÖððpooo3/лX÷ȬõõõBˆj[œœœ¤FCúÜvÛmï¼óŽ———bÈ!ü±B¡èn.›ææfc¯`>Fyx¹g°£££¢®®NÛ¢V«¥FÙ>íííS¦LùãÿxéÒ%¥Rùæ›oFDDœ>}ZÊÃ577›í1o…Â|”›m®>¹¨¾:WŸ\sõ¢‰˜«wÍÕ'ÕWçê“‹b®^4sõ®¹ú䢤¹Œ2NÏ<íâââíí——'íªTªÆÆÆÐÐPCúÔÖÖž?þ±Çsuuµ³³[ºtissó‘#G̽@¯Òc_NHHHIIQ©Tµµµ+W®ŒŒŒ”¾€••••ššª§««k``à–-[Ôjussó믿ÞÒÒÒ!<ÐAàU«VÝÿý¾¾¾ …"33Sjß¿ÿ®]»ô÷ùàƒjkk‡ æââòÖ[oeee >¼g–è%zìg̦?Ï[ÌÕS1Wïš«O.йzÑDÌÕ‹&b®Þ5WŸ\sõ¢‰zé\V«W¯¾þQ~ÏÖ¬YcÎ5Nš4©ïÍÕ'ÕWçê“‹b®^4sõ®¹úä¢úê\}rQÌÕ‹&b®Þ5WŸ\”±bw€¿kÆŠu=ö0æD“˜0aBZZZOWÿcÝÓ0«3gÎ<ýôÓß|óÍÏ?ÿìííùÖ[o !¦N:jԨ͛7ëvž:uêþýûu[>ûì³)S¦Ø~×]w}úé§ xþùç·oß^^^®T*o»í¶”””€€€}ž~úé>úèÇtvv¾ï¾û^|ñEGGÇë_»®ãǯ_¿þ›o¾ùå—_üýýg̘±|ùrWWW#N‘””,m;99ÕÕÕé-//÷ññé|ÖáÇŸy晼¼<;;»èèè·ß~[ÏémmmO?ýtzzzccãwÞ¹uëVwww!Dwí]{äqáÂu/÷Zxy‰×^3ëŒBîE£ÑL›6mРAùùùW®\Ù·oߘ1côŸòØc5é¸ýöÛ oÿðÃ;õá‡þôÓO¹¹¹NNN³fÍêÜçâÅ‹›7o...ÎÎÎþòË/üñë[wGŸ~úéĉ‡zøðáË—/gggk4šxzKK‹!ÝfÍš¥ ÀÕÕÕÚËòÈ#Lœ8±Ëôûí·ßN›6mæÌ™*•*??ÿÁÔú† 233srrJKK›››çÎ+õ﮽kfN¿BˆŸ~2÷ŒB0`Q.^¼XZZºtéRooo[[Ûààà'Ÿ|Rÿ)VVV¶:n¸áÃÛûõë×yÀ &4È×××××W¥Ruî³mÛ¶;î¸ÃÃÃ#,,,11ñ믿îr-=ô‡‡‡§§çÂ… kkk¥ö±cÇ>óÌ3QQQ!!!cÆŒùꫯ:œ¨Ñhyä‘… nÞ¼988X©TŽ5êÅ_”Ò¦ža“““'Mš´wïÞŸþù‰'žð÷÷wqq™>}úùóç»\©öhéš´··gddÄÇÇwyµŸ}öÙE‹=õÔS~~~~~~wÞy§þÓ·nݺbÅŠ77·M›6érÌûî»ïäÉ“999qqq]†[­×_ýË/¿Ü´iS‡öâââC‡½üòËNNN...)))üqUU•t4..ÎÍÍMš¨¬¬¬ººZ÷ÜK—. !¼½½;O§ØØØXOOO!DEEÅž={¶lÙâîîÞ¿ÿuëÖ={¶  @ßuüÍÖ­[-ZÔ¿ÿ·êêêZ[[wîÜùÎ;ï\¸paúôéwß}weeew§××× !¨=êääT__ß]»!åõm`À²8;;¯]»ößÿþ·Z­^½zõÒ¥K»‹©’?ýéOù:œœœ oŸûÌ××÷êÕ«¿þúk‡q&Nœ¸lÙ²ºººÚÚÚ¤¤¤iÓ¦IÏ'ËR([¶lÙ¶mÛòåË ¥Ÿ†ÊÈÈ0pX??¿èèèØØXéæð•+WvíÚÕÚÚªÞúúúŒŒŒ„„=}}ôÑ´´´ï¿ÿ¾¹¹yãÆ---S¦LÑszBBBJJŠJ¥ª­­]¹redd¤¡»kïÚ!ú+7¾¡CÍ=£DÓ×YÂ544ÄÇÇÚÙÙ9::†……íØ±C:Ôùæ÷ßß¹ñí·ßî²³¶ý‰'žÐS@[[ÛŒ3<<<úõëçååõÐC;wN:´dÉ’;î¸C*²Ãà666‡ªªªš;wîàÁƒÝÝÝçÏŸéÒ%©=44tûöíÒvSS“âìÙ³O?vìØ½÷Þëììlkk˜œœ,`Ȱ¦¾¾>))) ÀÁÁaÈ! ,hmmí0Åøñãß|óMíî?ÿùOOOÏ––=×§½½ýùçŸ÷ôôtppÿöÛoõŸÞÚÚº|ùr{{ûèèè .èo樓ëÒX}˜BÑ÷×}˜±b@,`À‹@X0ðßÛ¿ÿm·Ý&mO˜0!--­gëéì÷Y•áf̘±cÇŽž®â¿×Û¯C@_pæÌ™3f¸¹¹ÙÙÙ >|É’%Bˆèèè tè9yòäÇ{L1uêT…B±mÛ6í¡Ó§O+ iwêÔ©O>ù¤žI5ÍSO=µfÍi7))Iûƒº¿ƪjÇŽÖÖÖºí?ýôÓìÙ³]]]###sss;ŸÛÖÖ¶bÅ 777¥RS]]møøkÖ¬INNnnn¾þ%tpüøñèèhWWW;;»   U«V]¾|Ùè³è^'''Ū¨¨è|ÊáÇÿøÇ?ÚÙÙ9;;/Z´Hÿ¹Ý][C®¹VÀ³BoÖÿŒxΘÙp`ôzfÚ´iƒ ÊÏÏ¿råʾ}ûÆŒ#„ˆÿ׿þ¥V«µ=Ïž={èСøøxi7((èÍ7ßÔ}óÍ7ƒƒƒ Ÿ÷ÀÍÍÍ“'O–vgÍšuM§WKKK—íÆªÊÅÅeéÒ¥7nìОP]]ýÝwßUUU7îž{îikkëÐgÆ ™™™999¥¥¥ÍÍÍsçÎ5|üÐÐPww÷ýë_׿]Ÿ~úéĉ‡zøðáË—/gggk4šxzwW»3Ýë_]]Ýô›Gydâĉ>>>úûí·Ó¦M›9s¦J¥ÊÏÏðÁõŸÛݵ5äšk•?øË8wÑÜ3JÀèõ.^¼XZZºtéRooo[[Ûàà`éÎíÔ©SÝÜܶo߮홚š:~üx) !î½÷Þ’’’‚‚!į¿þúÞ{ïÅÆÆ>ïÞ½{§L™¢P(¤]݇]ëëëŸx≡C‡0 $$䨱cÝ5j½öÚkcÇŽÕî–””XYY•–– !~þùç'žxÂßßßÅÅeúôéçÏŸ—úŒ;699yÒ¤IAAA{÷î}å•W†êäääîî¾|ùòÎU]¼xñ¡‡òðððôô\¸pamm­vœgžy&***$$d̘1_}õUçÅN:õ2dH‡ösçÎ=ðÀ^^^J¥2!!áâÅ‹UUUúlݺuÅŠ!!!nnn›6m:xð J¥2p|!DTTÔÞ½{;·ÿ×ËÑh4<òÈÂ… 7oÞ¬T*Gõâ‹/JiSϰºW»»?Jº×ßÆÆÆÖÖÖÖÖ¶½½=##Cû1ºž}öÙE‹=õÔS~~~~~~wÞy§þs»»¶†\s D@¯7xðàÑ£G/]ºt×®]çÎÓ¶ßpà ±±±Ú{¼ÍÍÍï¼óŽnêèׯßÂ… ¥»wï3f̈# Ÿ÷ĉÝÝ\?~AAÁÁƒëëë÷ìÙãææÖ]£ÖC=tæÌ™üü|i7==}Ò¤IþþþBˆ‡~øÇ|xòäÉÿøÇ?BBB|}}_{í5éÐ’%K ¥{­ÙÙÙ---’áŸÿüç;v\½z5555..îšæ½r劣£cçö²²²}ûö¥¦¦2D¡PŒ92  ËFݳ ýöÛo !4Í;ï¼³xñb!DEEÅž={¶lÙâîîÞ¿ÿuëÖ={Vºk-„ˆõôôBôë×O£Ñœ8q¢®®N©TÞrË-ª*..>tèÐË/¿ìäääââ’’’òñÇkïÖÆÅÅIü¾ûî+++Óÿʨ®Ûn»­½½ÝËËËÁÁaïÞ½o¾ù¦ö–¸¤¾¾^1pà@m‹“““Ôh GGGímX£,çÒ¥KBˆAÝaµW[ÿEÖÖ­[-ZÔ¿ÿíuuu­­­;wî|çw.\¸0}úô»ï¾»²²²»s»»¶×Íû*0úggçµk×þûßÿV«Õ«W¯^ºté'Ÿ|"„ðòòš>}zjjª"55uÞ¼yvvvº'6lìØ±/¼ðÂéÓ§ï¿ÿþkštРA]†ŠÒÒRkkëaÆÉ6v°hÑ¢÷ß¿¥¥åË/¿T«ÕR=%%% …"**jÔ¨Q£F=z´½½½6’iSÜСCwîܹmÛ6ooï[o½õ£>ê0xyy¹µµµŸŸŸváR£´ëêê*mH×§¡¡Á+ÐÞÞ>eÊŸK—.566.[¶,""â§Ÿ~Òí#ýA]]¶E­VwùÝ©¯¯wvv6âr¤lÜ!X2¬öjw÷Gyï½÷´©Ò}ù\׉'rss»|þY©T*ŠØØØqãÆÙÙÙ­ZµÊÆÆæ‹/¾èîÜî®íõ_ó¾Š €>ÅÖÖvÉ’%C† 9yò¤Ô—™™yòäÉœœœ.SG\\ܺuë,X`ccsMs7î‡~èÜîïïßÚÚZ\\,ÛØATT”͇~˜žž>wî\)¼I¯Å~õÕWg~S]]}×]wI§èÞn1cÆçŸ^SS³páÂûï¿¿CêóõõmmmÕ~vXªÄ××÷š–ÜAmmíùóç{ì1é[ÊK—.mnn>räˆnooï¼¼KAAÁ¸qã:4^Ïr‚‚‚üýýß}÷ÝíFÿ°Ú«ÝÝeÞ¼yšß899u9û–-[î¸ãŽ.ÿ)ÄÎÎ.00P»+M§û'îpnw×öú¯y_E@¯wéÒ¥åË—çåå544ÔÕÕmݺõüùóÚßž¹ë®»\]]gΜyë­·†„„t>=&&æÀ«V­ê|¨­­íªF£{tÆŒºwç´üüü¦OŸ_^^®ÑhŠŠŠŠ‹‹»lìp¢••Õ‚ ^yå•={öHÏ?K£EGGÇÆÆJ÷!¯\¹²k×®ÖÖÖç}öÙgW¯^µ±±qssS(VVVº&Nœ¸lÙ²ºººÚÚÚ¤¤¤iÓ¦IôBºÒ×¥«!„puu ܲe‹Z­nnn~ýõ×[ZZ¤ •••%ÝxB$$$¤¤¤¨TªÚÚÚ•+WFFFJ1O·O—ãK>ûì³3ft¨çz–£P(¶lÙ²mÛ¶åË—666ž9sæé§ŸÎÈÈ0pXÿ(Õ××gdd$$$t×áÑGMKKûþûï›››7nÜØÒÒ2eÊ=çvwm»kïRhÇoQ›Ü¸ëú‡—ë éë,a®¡¡!>>>00ÐÎÎÎÑÑ1,,lÇŽºÖ­['„HOO×m¼ë®»ž}öÙC}øá‡J¥RÛ¡Ãÿxþþûïu;···5*''GÚ?~ü›o¾)m«ÕêG}Ô×××ÁÁAúàsw !F­ÛX__Ÿ””ààà0dÈ ´¶¶j4šÐÐÐíÛ·K}N:5~üø4hܸqÿïÿý¿ÎUUUUÍ;wðàÁîîîóçÏ¿té’Ô®;NSS“âìÙ³ Óý½(‰tú™3g¦OŸîìììààð‡?üaïÞ½Rÿ%K–ÜqÇÒvkkëòåË]\\ìíí£££/\¸Ð¹OwãŸ:uÊËËë×_í|­®g9æØ±c÷Þ{¯³³³­­m```rr²4‚!Ãêù£t {ý5Í?ÿùOOOÏ–––Î=%íííÏ?ÿ¼§§§ƒƒCxxø·ß~«ÿÜî®mwí½”±bBóŸÿˆÕ÷(}`™]<Æø4o˜¥–íÓO?}á…>ÜÓ…ôY117<ð: IDAT13gμ¦S£ï1V¬ëûá } a¬XÇ;À‹@X0À"€ °`€E ,`À‹@X„ Àmmm+V¬pssS*•111ÕÕÕ†÷qrrRü§ŠŠ ó–èez,oذ!333''§´´´¹¹yîܹ†÷©®®núÍ#<2qâDó–èe¦G&öññINN~üñÇ…………ÁÁÁgΜ ¼¦>ÞÞÞ¯½öÚC=ÔÝD E­`RŠx™š7ÌR01cźž¹\SSSYY&íÙÛÛŸ:uêZûìܹÓÚÚzÖ¬Yú§ëð¼ôêÕ«¥ö¼¼Bˆwß}×ÑÑ1::ºJôB}ÿQ| ú*>‚€…èÝ?ƒ€™€ °`€E ,`À‹@X0À"€ °`€E ,`À‹@X0À"€ °`€E ,`À‹@X0À"€ °`€E ,`À‹@X0À"€ °`€E ,`À‹Ðc¸­­mÅŠnnnJ¥2&&¦ººúšú>|øü£³³ó¢E‹ÌX8À,bbdþpz,oذ!333''§´´´¹¹yîܹ†÷ùöÛo§M›6sæL•J•ŸŸÿàƒš·v@ï£Ðh4=2±Orròã?.„(,, >sæL`` !}&Ož}úÝwß]YY©g:ÅZ½zµÔž——§íSTTÔÐРÝÕ=¤»ÝÐÐPTTÔå!F`F`F0Å×ï÷° F`F`F`F0|„øøø!NIÏ<\SSãêêzäÈ‘ &H-J¥òí·ßž3gŽlŸ{ï½W©Tþõ¯•r¬F£š––öý÷ß777oܸ±¥¥eÊ”)=µ@¯`ÝS¯ZµJ­VGDD455EEEeffJíû÷ï/--‹‹ÓÓç±Ç«««»ë®»n¼ñÆO>ùÄÓÓ³§èúþû±¼ ½ï€ßôîw€030À"\[NMMåsS€ÞèÚð AƒüýýMS &tm_ž={öìÙ³MT à÷NöÃTBˆìlÓ×ðß¹|ìØ±ââbínqqññãÇM\Æ'€—,YÒÚÚªÝmmm5qIŸL.--1b„vwĈ%%%&. ã“ À®®®åååÚݲ²2ggg—€ñÉàèèèE‹ýøã¦¸¸xñâÅÑÑÑæ© #’ ÀÏ?ÿ¼ƒƒÃðáÃûõë7lذ¬_¿Þ<•`D2?ƒäèèøÁ”•••––úûûûùù™§,ŒKæ°¢½½]­V755ùùùµ¶¶¶µµ™¡,ŒK&Ÿ?þ¦›nŠˆˆX°`b÷îÝK–,1Ka“LNLLœ:uªZ­4h"**êàÁƒæ¨ £’yøÈ‘#{öì±²²’vkjjL_F&sX©TªÕjínYY™»»»‰KÀøä866¶¢¢BqñâÅÄÄÄY³f™¥0ŒI&¯_¿ÞÊÊÊ××·¨¨ÈÃãÿþÏ=÷œy*Àˆä8;;»¢¢¢¤¤dÈ!ü0 —’ ÀŸ†††ÆÆF{{{S×€ÑÉ<½|ùòÜÜ\!ÄG}äêêêììüÉ'Ÿ˜¥0ŒI&¿÷Þ{£GB<ÿüó¯¼òÊûï¿ÿ—¿üÅ,…`L2@ÿúë¯vvvuuu .´±±‰5Oe‘Lvrr:sæÌ‰'&L˜`ccÓÔÔdž²0.™¼téÒqãÆi4š;w !¾þúë   ³€1ÉàeË–MŸ>½_¿~C‡B 2dË–-f) c’ÿ¤‘#Gv¹ @/"óhú0À"€Á \UUuüøñªª*SW€‰ÈàË—/ß}÷Ý^^^ãÇ÷òòºçž{jkkÍSF$€—-[ÖÞÞ^PPÐÔÔtúôé¶¶¶eË–™§2ŒHæg8púôi!DppðŽ;BBBÌRÆ$sX£ÑX[ÿ_H¶¶¶Öh4&. ã“ À·ß~ûâÅ‹ËËË…ååå‹/¾ýöÛÍRÆ$€_zé¥Ë—/ûùùõë×ÏÏϯ¦¦fóæÍæ© #Ò÷pssóW_}uèСâââòòr__߀€³U€é»ܯ_¿¿ýíoBˆ€€€ÈÈHÒ/ ÷Ò€ …»»û… L1q[[ÛŠ+ÜÜÜ”JeLLLuuµá}:x* Kæà[n¹å¶ÛnÛ¸qãöíÛßûQ&Þ°aCfffNNNiiissóܹs¯©OBBBÓoüq£”èÃd~xÿþýJ¥2##C·qÞ¼y×?ñÖ­[“““¥_Þ´iSpp°J¥ 4°•••­­íõ—°28??ß³ÖÔÔTVV†……I»AAAööö§NÒ ÀúûdeeíÝ»wðàÁ÷Þ{orr²½½½)êô2@;v¬¸¸X»[\\|üøñ럵¾¾^1pà@m‹“““ÔhHŸÙ³gïÙ³çСCk׮ݹsç¢E‹ôO§øO«W¯–Úóòò´}ŠŠŠ´»º‡t·ŠŠŠº<ÄŒÀŒÐçG0z ×ïwx%AÏñññBœ0…F£Ñs8$$dÏž=#GŽÔV?kÖ¬ï¾ûî:g­©©quu=räÈ„ ¤¥RùöÛoÏ™3çšú!¾øâ‹;ï¼³¾¾^©Tv9—B!³F€¡bbäûdg›i®ìlE¼LÍÆ©ô,cÅ:™;À¥¥¥#FŒÐîŽ1¢¤¤äúguqqñööÖ&~•JÕØØz­}„ýû÷×h4mmm×_ “ À®®®åååÚݲ²2ggg£Lœ’’¢R©jkkW®\©}¹755UŸÔÔÔ’’’ººº£G.]ºtêÔ©ŽŽŽF© ÐWÉ|+::zÑ¢E©©©%%%þóŸ£££2ñªU«ÔjuDDDSSSTTTff¦Ô¾ÿþÒÒÒ¸¸8=}þõ¯=ûì³õõõžžžÑÑÑÚwz=Nö±dÁ“É ‡È‚èÕd°J¥:zô¨yªÀDdž0a‚J¥2O)˜ŽÌਨ¨™3gÆÇÇ{yyiçÍ›gâª02™œ••5`À€÷ß_·‘ èudp~~¾yêÀ¤dÞ o ,`À‹ ó,IuuuCCƒvwøðá&«“ À‡Z°`AYY™n£F£1eIŸÌ#Љ‰‰ëÖ­«­­mÒažÊ0"ùG çÏŸo†:0)™;Àžžž—.]2O)˜ŽÌà°°°ÈÈȸ¸8WWWmã¼yóL\F&€?þøãþýû§§§ë6€½ŽLÎÏÏ7O˜”Ì;Àô ]ßNKK NKKë|466ÖÄU`dÝ`'''0 Ïè:=z´Ã½ï,`À‹@XùÜÞÞþÝwßíß¿_ÑÚÚÚÖÖfúª02™|þüù›nº)""bÁ‚BˆÝ»w/Y²Ä,…`L2811qêÔ©jµzРABˆ¨¨¨ƒeâ¶¶¶+V¸¹¹)•ʘ˜˜êêêkí£Ñh&Ož¬P(òóóR “ ÀGŽY³f•••´ëìì\SSc”‰7lØ™™™““SZZÚÜÜfÉ’%/¼ð‚‹‹‹!Ó)þÓêÕ«¥ö¼¼Iæ°F£Ù·o_nn®îsÛFùôTBBBJJÊ”)SÜÜÜV®\(„ÈÊʺråJ\\\w}|}}'Mš$ réÒ¥?üá™™™ãÇ¿þ’}˜LNLLܵkWTTÔõ¿÷ÛÁªU«ÔjuDDDSSSTTTff¦Ô¾ÿþÒÒR)wÙÇÞÞÞÞÞþ«·¶B ø`xxøo¼aô``N2X¥R=zÔÎÎÎ<Õ`"2@O˜0A¥R™§LGæpTTÔÌ™3ããã½¼¼´óæÍ3qU™LÎÊÊ0`Àû￯ÛHô:28??ߣë|ôèÑôj2ïÏž=»CKTT”ÉŠÀTdðÉ“';´äææš¬L¥ÛŸAJOOB444H•Jåááaúª0²n𫯾*„¨««“6„7Üpƒ§§ç[o½e¦Ò0žn°ô¨ó“O>¹yóf3Ö€IȼLúô 2€¾ °`€Eèö#Xºª««´»Ã‡7Y=˜„L>tèЂ ÊÊÊt5)KÀødNLL\·n]mmm“óT€É?=þ|3Ô€IÉÜöôô¼té’yJÀtd…EFFÆÅŹººjçÍ›gâª02™üñÇ÷ïß?==]·‘ èudp~~¾yêÀ¤dÞ–TUU?~¼ªªÊÔÕ`"2øòåËwß}·——×øñã½¼¼î¹çžÚÚZóT€ÉàeË–µ··455>}º­­mÙ²eæ© #’yøÀ§OŸvqqBïØ±#$$Ä,…`L2X£ÑX[ÿ_kkkFcâ’Ðcñò}4o˜¾0™G o¿ýöÅ‹——— !ÊËË/^|ûí·›¥0ŒI&¿ôÒK—/_öóóëׯŸŸŸ_MMÍæÍ›ÍSF$󴇇ǡC‡Š‹‹ËËË}}}ÌSÆ%€%D_@¯Öõ#Ðiii?üðƒ´Ñ™Q&nkk[±b…›››R©Œ‰‰©®®6¼ÏóÏ??räH;;;WWט˜˜ââb£”èú¾œ––æääÜeܽþ‰7lØ™™™““ãîî¾pá¹sçæääØ'**jΜ9ƒ®««[³fͬY³Nœ8qý%ú°®ðÑ£G;lÝÖ­[“““¥_Þ´iSpp°J¥ 4¤Ï„ ¤ŽŽŽ¾¾¾»ví2Q‘€>Cæ+гgÏîÐuý³ÖÔÔTVV†……I»AAAööö§N2¼Off¦‡‡‡ƒƒÃ /¼°fÍšë/ зÉà“'OvhÉÍͽþYëëë…Ô¶899Iö¹ï¾ûNž<™““7nÜ8ýÓ)þÓêÕ«¥ö¼¼ÙÙ¦¯ú>™G ÏŸ?ÓM7EDD,X°@±{÷î%K–˜¥0ŒI&'&&N:U­V4HuðàAsÔ€QÉ<}äÈ‘={öhŸyvvv®©©1}U™Ì`¥R©V«µ»eeeîîî&. ã“ÿàØØØŠŠ !ÄÅ‹gÍše–Â0&™¼~ýz+++__ߢ¢"þýû?÷Üsæ© #’ÿàìììŠŠŠ’’’!C†ð;À`,Šxù>š7L_€Å ÀmmmVVV>>>>>>RËÏ?ÿìàà`úÂ0&™mžÊ0"™G “’’öìÙ3eÊ!ħŸ~ºnݺ[o½µ¾¾Þ,µ`428??ßÓÓSÚ¾á†þú׿þñ4}UY·@þùçíííRú­­­mooB´µµ•••™¯:Œ¤ÛÕÜÜ,m9ò§Ÿ~B´´´,Z´ÈL¥`<ò_  ,`ô}zÅŠVVVBˆ_~ùeõêÕmmmæ* cê6ßu×]gÏž•¶###+**´íæ¨ £ê6úé§æ¬“â`€EÐ÷0º£ˆ—ï£yÃôu F½GLŒL‡ìl³Ôè•zìè¶¶¶+V¸¹¹)•ʘ˜˜êêjÃû<ýôÓcÆŒ±··÷ññyôÑGëëëÍ[; ÷é±¼aÆÌÌÌœœœÒÒÒæææ¹sçÞçâÅ‹›7o...ÎÎÎþòË/üqóÖè}zìè­[·&''‡„„!6mÚ¬R© é³mÛ6©ƒ‡‡GbbâæÍ›Í_? wé™;À555•••aaaÒnPP½½ý©S§®µâ›o¾7nœþéÿiõêÕR{^^ž¶OQQQCCƒvW÷îvCCCQQQ—‡þ‹ tý5Hÿ*®u4ý \…q'úÿß#ÈŽ` #üw3&Fþ?ã¿Y]ÖÀŒÀŒÐÝñññBœ0…F£1ÖX†+)) øá‡‚‚‚¤ooï5kÖÄÆÆ^SŸ×_}õêÕÇ÷÷÷ïn.…¢gÖú™óû±F›Ë€ÿů,ÿ "ãÌ•-».>ÀÛ™í#X¿«ÿk @–@ IDATï¥s€ñ+ÖõÌ#ÐŽŽŽBˆºº:m‹Z­– ïóòË/ÿýïÏÉÉÑ“~ôÌ#Ð...ÞÞÞÚ[Þ*•ª±±144Ôð>k×®MII9xðàèÑ£ÍY9 —걯@'$$¤¤¤¨TªÚÚÚ•+WFFFJ_ÀÊÊÊJMMÕß'))é­·Þúì³Ï|}}¯^½ú믿öÔ*½E}zÕªUjµ:""¢©©)***33Sjß¿iii\\\w}~þùç””!ĨQ£¤Slll®^½ÚCëô=€­¬¬6mÚ´iÓ¦íiiiúû888ðQ+Àµê±G 0'0À"€ °`€E豯@˜‚"^¦ƒæ ³Ôøýá0À"€G ë#ß';ÛôuÁ`€Eà0¸>< è%ÀôE„R:áh€Eà0ÌÈœ·#dçâ¾`a¸ °ÜLN/ßGó†éë,øw‰/—€±ñ4àÿ³wïaU•iÿÀŸ-  ( Ѝx>1#Z™‰ "¢™šZšç”ÔÄêU'Q™“©“Zy@…$µ+zƒdp¦ÌC8cf:h–'1tsØX×ïõk¿{Íq¯uïýÜûû¹ú–]|y€}ïçYë98<vzxÚ ÎO€À)` NS ï¦ È `xÒ÷‰)Ðàð€éžÊRÂ`p €SÀh*8P$r?o[Í%{Ûœ–FÀÿ‡A)H„ã ”ÀàX~s\ŠA)<¬§€08Lp \w(À pËŠk @'˜ NO€~×I7\Ûp7x N`p €S°ÛàÚÚÚ¸¸¸­[·VWW<8!!! à>ÿŸO?ýtÆ 'Nœ¨©©1›Íöøö„BŒýÛÿÏöíúðÛì6^¾|yjjê¾}û¦M›6~üø}ûöÝçÿãëë;gΜ¢¢¢¹sçÚã{ø/`ßAa·pBB¢E‹BBB„«V­êÞ½{NNN—.]îçÿyòÉ'…;vì°ËwàXîç®ssÜuÖ~«î£ bìÁ žÊB}ì3...¾zõê£>ª~Ú­[7'NX€ïçÿf~óa)îVÀ³Ï&XBoooËõâõÿÜ'ÃZ¼x±zý‡~°ü?çλyó¦åSë²þø7‚fýö7oÞÿ&ïÿ/ _Á1¿Â}r¶W–¶YÚþ.î3è?IÛiòžuÿYõ~q‡}Ïú¯à•¥U#TZÛ9B+ðþ+¶¿²4o…íø½g‰ÿì3ßçOrÖ¬YuqZ}3EQ´úZ÷¯¸¸ØÏÏïßÿþwß¾}Õ+žžž[¶lyî¹çîÿÿÙ±cǨQ£~s,ƒA£6ÞÇ$ Íf× ËÆ ûÈr¨FÝoð¢ÙZ Ù^Åšemß®Í#‡j”giUÞñ‡á@Y”œu 4å²R‡ÊÒê¯]³)Ð\_ÅŽT1´ÖÙç °¯¯o«V­,#þœœœêêêž={þ·ÿÀ}²Û9À111ï½÷^NNNIIÉÂ… ÃÃÃÕŽ۶mûðÃïýÿÔÖÖÖÔÔüòË/Bˆššššš{µda·] ÿô§?•••………™L¦¨¨¨ÔÔTõúž={.]ºôâ‹/ÞãÿÙ²eËÌ™3Õ7n,„0~~~öhÀÿÁi7ŽÌn`—U«V­ZµªÎõ¤¤¤ßüf̘1cÆ Ý¿E€†£‰îcÁ6É÷ðì6€Àà0§€08»m‚Nî77LÆnÉ +lÙí„0€úÜÇ&ƆûØÄïà80Çýœ¬ƒ#%lƒ;ÁúÂùX`W87_N›`€SÀœÀà0§€M°À™üæ&Øá€/ €ÀÞpÀ€c ÀŽ&ÐÀ ߨcT0(6Á§€08 €À)` N`p €SÀœÀà0§€08 €À)` N`p €SÀœÀà0§€08 €À)` N`p €£[¼x±½¿pDøÃ€záC"øeA½ð‡ºÂÝÛo¿mïoþ0 ^øÃ~YP/üa€®0§ 帶¶vÁ‚þþþžžž£G¾~ýº½¿#ptR€—/_žššºoß¾K—.ýüóÏãÇ·÷wŽ®¡½¿‘°hÑ¢!ĪU«ºwïž““Ó¥K{_àÀÙ !þýï[®xxx¤¦¦Þíÿ·÷l¥ÉpR¾'ÀBoooËõb½0!ã`///!Dyy¹åJYY™zànäûúú¶jÕê‡~P?ÍÉÉ©®®îÙ³§}¿+ppg/[¶lóæÍ»víò÷÷ŽŽ.//ÿæ›oìýM€C“o °âOúSYYYXX˜ÉdŠŠŠJMMµ÷wŽNÊ'Àÿ-ùÖ< €À)¸,^¼ØÞߨ*//ïÔ©S...G‚±ÌbÙ(â,~rsskkk=<<åøY,Å8‹%®¿,¼Š‘e÷,ÍI¹ –ãèÛ·ï=þ533YޓŲQBˆÊÊÊiÓ¦¥§§ !E;vì–-[<==5ŒàŲQ4Y¡¡¡÷ø×ãÇKdmåÊ•qqq...ÿøÇ?†úùçŸΙ3Y˜Å²QŒ³X¾ïsýeáUŒ,»gé›`Ù$))éÿ:cÆ d9NËF !.\˜ýÑGýáøúë¯gÍšÕ¥K—Õ«WkÁ;‹e£h²’““Õ.^¼¸aÆèèèŽ;lÞ¼yôèÑï½÷žtAÖ:wî¼oß¾ãǯZµjß¾}.\>|xvv6²0‹e£g±|ßçúË«YvÏÒ…2kß¾ý•+WEéÒ¥‹¢(mÛ¶E–1Î ;zô¨åÓ7nDFFJ¤(J¿~ýE©­­mÓ¦¢(·oß~衇å˜Y,Å8‹%®¿,¼Š‘e÷,=`, äææ~ûí·¹¹¹Èrü,~ºqãFëÖ­-Ÿ6kÖìæÍ›ÈrÀ ÆY'Ož´ž¥ìïï_QQ!uÂËË«¸¸¸Aƒÿÿ.™““ãããƒ,ÇÌbÙ(ÆY*fïû\Yx#ËîYzÀØ&7nÜ8p`ûöí###Û·o?hÐ £Ñˆ,ÇÌbÙ(!D³fÍJKKÕkkk—,Yòøã#˃guèÐaãÆ–O322}× !ºuë¾fÍšªªª>ø`ĈãÆC–cf±lã,–ïû\Yx#ËîYzÀ`›Lœ8ñÖ­[|ðA›6m®\¹2gÎOOÏO?ýY˜Å²QBˆçŸ~ĈcÇŽ 0™LíÚµûÇ?þŒ,G bœuàÀaƵjÕJ]š{þüù¯¾ú*,,LÞ aµ}ŽÁ`ðóó8pàË/¿ìêêŠ,ÌbÙ(ÆY,ß÷¹þ²ð*F–ݳô€°MÎ;çíí­~ZVVÖµk×ÂÂBd9`ËF !jkkEiذáž={~÷»ß¹¸¸èÄ5‹e£ˆ³„F£1===???((èÙgŸõóó“=tÂò}$‚)Ð61 µµµ–Okkk ²3‹e£„ß~ûmÆ …C† uqq)((@–1ÎBøûû<8**jðàÁºJÉ‚Nžü+777õd9`Ë÷}T Y‚¾¯,®YzÀØ&F£ñÉ'Ÿ´ÜMxê©§Š‹‹‘å˜Y,¥(JVVV·n݆ RXX¸xñâ&Mš|òÉ'ÈrÀ ÆY&Lxæ™g._¾¬(ÊåË—GŽ9qâD©ƒEñöö6™LŠ¢DFF&''Ÿ9s&88˜AÖË/¿|íÚ5d9~ËFUVVŽ=ÚÕÕµS§N§OŸF–ÃqÍjÞ¼yYY™åÓÒÒÒ€€©ƒEiÚ´©¢(555žžžj¨··7ƒ¬Ö­[«D«.]º¤_×™,ËÇǧΕ:è„, 1{ßGÅ%HáûÊ⚥¬Ö@uuµÉdª®®F–ãg±lÔÑ£G333{ì±ÂÂÂï¿ÿYÄ5‹åоàààíÛ·üñÇ!!!ÞÞÞUUUnnn ²JKKƒ‚‚,Ÿ¶iÓ¦¨¨Hö¬Gy¤Î•^½zé„, 1{ßGÅ%Hð}eqÍÒ…½GàRÚ±cÇÑ£GE¹råŠz´¦§§§",,,??Y•ŲQÖ–.]Ú¤I“¤¤$EQþùÏè·Æ‰eËFg±\Ñ÷å—_º¹¹¹ºº~ñÅŠ¢>|øÕW_e’““cù4''§G ².^¼h4uúâÈÒË÷} T Y‚Tœ^YÎ¥9 €Dpp°ú*}òÉ'§NªþúFãÔ©S‡Š,‡ÊbÙ(k]»v=uê”åÓk×®…‡‡#˃g±\ѧ(JUU•~»°Ø+ëðáÃ.\°|záÂ…ÌÌLÙ³V¬XÑ AƒFíܹSQ”ÔÔÔ5kÖè„,[°|ß·†Š!EÂî•Å>K?ËZ ooïêêjËõêêjÍ—a K– â,‹ÊÊÊ:WÌf3²0ˆq–ŠÙŠ>Þ.^¼¸ÿþ‹/òÈêÔ©S~~þŽ;  (Êùóç»víŠ,GËbù¾ï$È*M³Wû,=`ü ‚‚‚®^½ª(J§NŠŠŠ,×Fc§NåPY,àÈJKKÿö·¿ÉôX}EÙ¶m›ú¤Y”GI‘eõë×OQ”ÚÚÚ6mÚ(Šrûöí‡zH dÙ‚åû¾*†,A »Wû,=4Ôs}1[#FŒxî¹ç–-[öꫯŽ?þÍ7ßlÛ¶íåË——,Y²hÑ"d9TËF !BCCïñ¯ÇG–ƒ1βvëÖ­ƒîÝ»wïÞ½?þøcûöíŸy橃f̘QïõîÝ»ßퟤȚ7o^Ó¦M/_¾Ü¦M›+W®Ì™3gÞ¼yŸ~ú©¶)ÄY^^^ÅÅž¾¾ê§999>>>š§ ËF,ß÷-P1d ì^Yì³ta︔***fÏž}·ß4²*‹e£EùäWo¿ýv@@@\\\RRÒÒ¥KÛµk§ùf,³X6Š8Ë"**ªqãÆ&Løè£Ôsz¥bŒò()²¬?þñ=zôX½zµ¯¯ïûï¿ß±cÇùóçë„,[°|ßgå)wÌ^Yì³ô`P¥Þ÷ãæÍ›·nݪsÑÏÏY˜Å²QBˆþýû¯Y³æá‡V?5&LØ»w/²-ˆqV@@@MMÍÔ©S£¢¢ ФI=R(ƒkѢř3gš5k¦~Z\\RPP uVß¾}Õ ƒŸŸßÀ_~ùeWWW̓¥ –ïû\‘½Š)K×W×,=` 7//¯ÒÒRË•>}ú9rYŽÄ8KQ”“'OîÝ»7###33³Gƒ zûí·å w™L®Órʬ矾¨¨è½÷Þk×®ÝÅ‹_}õÕ-ZlݺUö,ûBÅ%@ÑÀÞß8¢¤¤¤ÚÚÚ:wïÞ­Çb ²,–BtèÐaãÆ–O322ô»«Å2‹e£ˆ³ CÏž=çÏŸ¿uëÖ>øÀh4.Y²Dê !D¬•©S§þüóÏÆ cõþûï+ŠÒ½{÷Æ÷èÑÃÅÅåý÷ßg`_¨²<†z “Éäîîn}155uíÚµ”4‹e£„6lX«V­:vìXPPpþüù¯¾ú*,,LÛÆY,EœµsçNuWª«W¯FDDDEEEEEµoß^Þ ;Ɖ'fddðÈ*((ÈËËkݺuË–-õK!Ëâúðk–3@Åpä ®¯,®YzÀ.ÐP¿]»v5jÔÈúJQQщ'¤ÎbÙ¨'žxâÂ… éééùùùÇöÙgõ[àÄ2‹e£ˆ³† æååµpáÂyóæyzzê”Bt'V£gú裸¸¸C‡9sæÐ¡C .LLLüÓŸþ$]–å°øgžy¦°°pèСÿó?ÿ3fÌ˜ŠŠŠÈÈHM"ì’e±gÏž.]ºtýO]ºtÙ³gÔYnnnêz’#GŽ˜ÍæÕ«W?òÈ#š¯Ë% pd]LÊ,~š4ib}Öˆê…^ˆ‹‹“:‹¬?CÙqbŒìUŒra#®CöÚ~š‡ÌÌL—™3gîÛ·ïܹsûöí›>}º‹‹ËáÇ¥Îòõõ={öl‹§Nòóó“7+,,ìèÑ£–Ooܸ©m„]²„¿üòK‹¿üò‹¯nÊ,–ŒF#Ë,Ѓ¢M›6Ë–-³¾˜‘‘¡Ç™7”YdýÊŽ€Ýq­zÀ`›<öØc)))sçÎÝ´i“z%000%%¥OŸ>Rg™L&ÿ:ª««åÍ:yò¤õeýŽi¡Ìâº%–«ëÓCM¸f\(·è#Ë"ëÏPvœP1À°¬zÀØVcÆŒyæ™grrrJJJš5kÖ¥K— ôšXN–½bÅŠ.]º¨W²³³ccc,oV‡6nÜhy•fdd(º-€§ÌjÙ²ennn5N¹¹¹-[¶”:‹%N«ë­±<Ô„kÈ…r‹>Ê,²þ Y*8®CsØ êqýúõqãÆíß¿ßÓÓÓÛÛ»¼¼¼ªª*<<<55Uó}ÞȲ80lذV­ZuìØ±  àüùó_}õUXX˜†vÉš8qbPPЊ+¬/Ο?ÿúõëÉÉÉòf±´`Á‚´´´ØØØÞ½{ûøø”••=ztåÊ•ãǯóS•+Ë`0Ü95Àl67jÔHÛ·² ÆY,q}øf—-ú²XBÅ*†\YzÀØ&\_Bª³gÏž8qB<Ù³gÏ®]»ê‘B™e4ÓÓÓóó󃂂ž}öY]+"Ë:|øpXXØ /¼0qâÄV­Z]½z599yëÖ­‡Ò|–eK~~~´LvPeeeEDDFy³ÜÝÝKKKëœ X]]íëëk2™d bœÅòmË`0Üí˜c²,ww÷òòrõ4ÔK—.ÍŸ?ÿرcýû÷×|¯ Ê,–‹&P1d ¨²eé`›Ì›7Oý ººúÓO?9s¦ÐíÏš2 ä’––6wîÜk×®©Ÿ®Y³f̘1²gñãééyç!£Ñ\UU%oËCM¸f±ìùáá›\Èú3”'T Y‚*`¬•ÂÂÂÐÐÐÂÂB¡ÿŸ5eÈâöíÛ4 ÑɲXÞu5j”¢(w®xwuuݾ}»¼Ydsã¹Nø§ÌbÙóãúð=²þ A*†,A° –V<<<,»WUUyxxðÈY4hР[·nœ²Xî«”˜˜8nܸ®]»ÖYñ¾yóf©³æÎVVVvçÜxIƒg±Ü7{JЬ?C„Š!K@ÅÓ;µœœ¬î¼Ê& Ü»“:KP9Ld‘’’òᇦ¤¤dggë”BœµmÛ¶ÀÀ@Ë;K``à¶mÛ¤âš|úôé:³²²Úµk'oÖ„ ,XPç⫯¾:iÒ$mƒˆ³œY† CŠ S 5³~ýúW^yÅÅÅÅ`0¤¤¤<ûì³<²ÀÁq]ˆÎr_%ÞÈæáó›ðO™År¢&ö”Y†&Ãñƒ*` °¶Nœ8‘Ý«W¯:;¯J—År›P–²Æl!:Ë}•ìŽkÏ{Ê‹¬ïDÙIcƒìUL}º‹Ʀe IDAT‹ËáÇåÍbÙ(ÆÈÎfy¸1q–µøøx!„‹‹KÆ ÓÒÒqÍ*((P?Öûƒ,«¼¼¼I“&êÇeee:Ñdq=­”¬ïDÙq²†Š!E‚Š!I–0¶‰Âd2Õ¹˜’’Ò¯_?=²Ú´i³lÙ2ë‹!!!šg)LÏsgÙ(®æþjæÌ™êÇsæÌÑü½,HQ77·êêê:«ªªÜÝݥΪãøñãŸ}öÙÙ³gÙ±ÌbÖó³HNN®­­]¾|yhh¨~AY\ïÛ’õˆ;NÖP1?H…ŠáøYzÀØ&Bˆôôô/ÿS||¼§§§YÙÙÙ­[·^·nåbAAAãÆ5ÏRÕÖÖž9sæ»ï¾;sæLmm­N)ÄY,ŧ»ÎÁÁÁ§OŸ®s1++«]»vRg¤8õü,˜=|cyß–¬ïDßqâìULYšP1¤ÈÒÀ6QÿŽë¥G–ÉdÊÉÉiѢņ Ô‹GŽiÕª•æYõºyó&}(§»Î˜ž…YÏÏ‚ÙÃ7~÷mÉúNè8i‹å C’,má$› “ÉD³ã™%+++kàÀaaa  ذaƒ¶Y,· eÙ(!DRRRtt´‹‹K”âââI“&i›e0æÏŸ¿jÕ*ë‹/¼ðBóæÍÿú׿j›eì|c6‡g¼Nœ8A³oþ¼:éÔd2ü™mO¤w–ÑhÔãË‚®˜MO5!k”BÛ.–5Õ`‹)))ýúõÓ6HÍjӦͲeˬ/fdd„„„hžeÓ™Ud}'ÊŽ*†ÈÊ…‚Š!s–V0¶ e jÛ¶í¿ÿýoõã/¿ü²W¯^ ÕËÅT”xnL“UogB– S§Nùûû?÷ÜskÖ¬éÖ­ÛìÙ³õË"`÷vÉÞóB¤§§ùŸâãã===µ Rho|×Áæ¾-Y߉²ã„Ša{Ù@Cê,M`ìdÊ4gΜ^½z}ûí·{÷îíܹó›o¾©` ´7M£¢¢Õ÷îÝ;ËŠæY,ï:[ HNN®­­]¾|yhh¨ÔYd=?77·ššõãÜÜÜgžy¦]»vS¦L)//×<‹²;KÜ.~=?®߸"ë;QvœP1l"(¢bÆN6¡¬A%%%C‡5 ƒa̘1•••Š¢˜Íæ>ø@ó,åMS??¿‹/ª÷ÝwÍ›7ùå—ÕÝD4Ïby×Ù§éI,çá³l”´çÇõáWd}'ÊŽ*†íA”+\P1œÎ–LEEEƒ pþXPžÍëæævãÆ ooo!Ä… ÂÃÃóóókjjÔ´Úf‘oì ‡ëÅòPJ–B “ÉäîîN™•••5pàÀˆˆˆ°°°„„„h»5.e£ÜÝÝËËËÝÜÜ„—.]š?þ±cÇú÷ïïååEð À‹¦ã„Š¡UÞåB b€k ¸¸¸iÓ¦ m©]SSSUUåííݰaCšD]±|Ï l”Zǯ\¹þç?ÿyöìÙBˆï¿ÿ~ôèÑùùùÚf§¤¤ôíÛW±cÇŽ?ÿùÏ?þø£~`—zÿIÛž(€\ †ùóç¯ZµÊúâ /¼Ð¼ysmw«¬¬´œº©G–¢(ÕÕÕ^^^Š¢\¾|ÙÝÝ]ó,Át›P~²(**ª­­Õé‹ßÉd2ýòË/úE°¼ëL‰òH6Ü.“ðù-.à×óãúðë}[²¾qÇ ÃÄET Û³¤~Òƒ°Mz÷î½qãFEQÖ­[×¶mÛ¹sçž={vîܹøÃ4Ïêׯ߻ᆱ(ÊíÛ·ßyçÇ\Q”Ë—/7mÚTó,²Ù§ á{ËF)|¢³¼ëÌuzËCM/.à×ócüðå}[²¾eÇ ÃFÄET Û³¤>ß`›¤§§7lØÐÓÓÓÇÇ'++KÝ×ßßÿðáÚg]aÙ(…õBt–wYNOby¨ ×Å\{~İ &ÈúNA¨6b\.T ÇcPE€Í ºŸ$AËóÜY6JñðÃÏœ93&&&>>~ÕªU£Fš={öÆ;¶ÿ~m³ÂÂÂF޹páBEQ–/_¾cÇŽC‡]¹r%44´¤¤DÛ,ƒÁ`2™ÜÝݳ²²–0`À€ 6h4wîܬY³æçŸ~饗Æ·dÉ’ššõI©†AªQÚ~Ù{géýBŒ1ÂÇÇ祗^Z·nݵkׄcÆŒIII üüóÏ5 zçw6nÜ8cÆ !DRRÒ¸qãnݺµuëÖÝ»w÷ïß_à AØ(AÛ.z 4x衇ô*..nÚ´iƒ ôRÕÔÔTUUy{{7lØP§ˆÊÊJ‚Ÿ›‘õ‚P14AV.*†“³ãà›ÊŸ$A–`ºM(¿F)|¢³¼ë,˜NOby¨ ×Å*f5¹>|Ϭ²FÖw"BÅÐ Á¼nT ÛÙ¥bh`mð³Ü&”_£T,¢Ãô$[°<Ô„ëâ–=?ìh;Êû¶œÀ¨¶#(¢bØÎ.CCr|—ޝ´´”S–à»M(³FYc¶]Åì®3×…è,5ázÊËžׇoŒïÛªÈúNA¨6¢(¢bØÎ.CCr|—@LpÜ&”e£¬1›†Àò®3×éI,5Ẹ€eÏëÃ7ö÷m9AŰå@Ãv²W 9¾K‡ÕóžäÍb¹M(ËFYc6fy×™ñô$–‡š°\\À²çÇõá×û¶dýÊŽ“‚Š¡Q¢ÿ@ÃvvyÒ£!9¾K‡µxñâ–-[¾þúë›6mzýõ×[´h±xñâO~%o=š˜ÄˆÅlÌò®3ûéIœ5ášÅ²çÇõá×û¶dý»tœP1 å@Ãv²[…°MÂÂÂŽ;fùô‡~ c¥b¶M¨Še£TÌ¢³¼ëÌ~z§-m¸fqíù±|øFæ¾-Y†¾ã¤ b<(Êr¡ bhDÞÇWÛ¤I“&f³Ùò©ÙlÖc}ËmBY6Š1–wÙOOÂXŠ,Æ=?fßTüîÛ’õg(;N¨Œ~ ˆŠa;Ê'=ÂØ&¡¡¡ –Oׯ_¯>f‘=‹å6¡,¥ð]ˆÎò®3ûéIK”ŲçÇì—Åõ¾-Y†²ãdÁìЂå f¿,®CEQ<¨ 6¬mÛ¶ÁÁÁ¹¹¹yyy;wî “=Ë××÷üùóM›65Í›7¿zõj``àµk×~ÿûßi›ÕªU«Ý»wÿîw¿B;vlêÔ©'OžüàƒÒÓÓ8 aËF !Þ~ûíÄÄÄ^x!88øÒ¥K}ôQLLL‡Ô;;[Q”F=ñÄùË_{ì1̓BCCïñ¯Ç—.ˆq–³G*fßš4iR\\ìêêZSSÓ¸qãÊÊJOOϪªª–-[j^x-YB“ÉÔ¢E‹òòò+W®téÒE½k ÷†Š!]@ÅÐ"KÈY1´ŸLèTÔu‰udff¦¥¥­Zµ*33SÒ¬7Þxã¹çž‹mÔ¨Ñwß}7cÆŒ5kÖøûûïØ±CÃÕÊ•+‡ºråJ!Ä­[·vîÜ)„8{öìÈ‘#µ bÙ(!D‡’’’fÍš¥~š˜˜Ø±cGÍSè³XŠ}ôÑG·lÙ²nݺéÓ§ !fÏž’’2gΚéI‘‘‘zLO=zô=æÆËÄ8Ë¢´´Tׯo—,²¾,MVçÎ7oÞ“””Ô¶mÛ×_]½oÛ«W/ͳ~ÿû߯^½Z½oûÁôèÑC½Þ¸qcͳÈú3”'T é‚*† (+†ðØ&IIIw^œ1cÆ™3g:¤v=eÌ„Ë/á L–âºÝ‚Ó]g® Ñû÷ïoy(ñã?Ι3ç»ï¾Ó6…2ˆqK\¾mß¾ý¹çžsss³Ü·ÍÌÌTïÛöéÓGà !Ä¡C‡†ª>ÏQïÛöïßÿ믿þì³Ï¶lÙ¢mY†²ã„Š!T ÛQV =` ÷’œœ¬ùÎFvÏâ×(£Ñ˜žžžŸŸôì³ÏúùùñÈRqÚW‰ëô$²¹ñ\'üSf±ìùa/@M0Þ*†,AC#rW Ú%ÇüÍ;—Så_YËFÈ7VhÏ7fy¨ ׬ŋ·lÙòõ×_ß´iÓ믿ޢE‹Å‹ò+I³° ¤Yd}'ý‚P1d RP1¤ÍÒžÛäСCï¾ûnAAÁíÛ·Õ+YYY!!!Bˆ£GÊ›eÁr—fâºå]g®Ó“XjÂ5‹åDM®ß,˜½m‘õg(;N¨² T i³4$å7í8ºuëöôÓOwíÚU=Z±hÑ¢wß}W1mÚ4y³,X¾„˜5ŠëBt²YC˜ž¤ ²¹ñ\'ü“e±ìùõêÕ+&&Ʋ?߆ ’’’~üñG̓ˆ³,˜½m‘õgˆ;N¨R T i³´DüÄ™Ÿ:W:tèÀ Ë¢´´Tïú,–â‡lÖ¦'Sa9QóÛo¿mÒ¤IHHÈðáÃ{ôèáååõÝwßéDœeAÙ[#È"ëÏØ¥ãÄ Ë.¨’fiHÎQ»ÃˆŠŠÊÈȰ¾2vìØmÛ¶ÉžR›7oÞêÕ«eÏby×Ù‚ÙÝY–‡špÍâ:Q“åÃ7 f§•’õg(;N¨²©P1dÌÒÀP–Û„²l”à»lÖ¦'ÙŽå¡&\³÷ž€] bH€0Ôƒåñ,%ø.Dçz×YÅl `_\¾q½o `_¨reéA—W@v;wî´üq?óÌ3sæÌyë­·¤ÎbÙ(!Daa¡:µX¶l™Nû¢Qf=ñÄ.\Po>\¿›ÁdAÖJKKõްK–Ù<|þ)³XöüîöÈ«{÷îš? £Ì=zô=î¥Ê›å$P13H bÈ–¥<€z°Ü&”e£¢ƒ`y¨ ×,Æ5ùayfW¨²1†Šqÿ0†z°Ü že£cy×™ëô$–‡špÍr<¾q½oË*†ÔP19KC=XnʲQŒ±¼ëÌu!zÓ¦MëL´îرãùóç5Œ bœu'=?®ßpßV"¨¡bÈ•¥ €¡~,· eÙ(×éI,5ášÅ²çÇõáîÛJC– Š![–090¸ëŒéI`w,{~Œ¾á¾-ØË.¨Òei`¨ËmBY6Š1–w1= ìŽeÏëÃ7ƪ««ÓÓÓÃÂÂÚµkÇ#ˆ+–+\P1Ç Ù$)))::ÚòŒEµ{÷îâââI“&É›Årƒx–blúôéO?ýô¨Q£¬o¿òÊ+ò !Ö®];lذøøxë)CzgDyä‘:Wzõê%{Vþ¥B¿þ%eãû¶%%%Ó§O÷óó{íµ×^zé%KùÕYWd¯bÊÒ„Š!W–ðØ&ƒÁd2¹»»[_LMM]»víÁƒåͰ;–w¦'À}c|fU~~~hhè‰'fÏž}óæÍ>ú¨}ûöÚFØ㊡9 €mb0ÒÓÓ5jd}ñòåË‹-ª¬¬”7ëN ¶ µcq,÷Uñ븴¨¨HñÙgŸ½öÚk±±±z<¡% ‰`lƒÁPgN²…Ùl–7‹å6¡,rÁô$°û¶………ƒ :}ú´z¥¨¨hÞ¼yµµµûöí“1À1ñ¨zÀ`[UVVÖ™–Ì ‹rU$Ë¥ž”Y\¢³„…èpÿê½—ªfFsßV§¬–-[Öù@'dAŽ€kÅОۤÞu¹ ²XnʲQ јbyf•âøñã7nÜ7nÜa-G—Ëà¸V =à °MŒF#Ùã_Ê,–Û„²l”j×®]u‡8qBö,g€éIp7………j‡ÒbÙ²e:u.)³BCCËËËçÌ™£÷(”,HEyäŽw‚;q­zÀ`¹q]ˆÎ×…èdsã¹NøÇâ¨N+•K~~~‡hŽ\¢ÌY bÜ? €m2cÆŒ!C†Œ;–YH„ë<|–¸NO"›ÏuÂ?€S!ëÏwœ(\ÂñN6Âãz'‡)Ð6¹téR¥ž<²@Œ§'‘Íç:á‹ lÇõr!ëÏØ¥ãÔªU«/¾øâ³Ï>‹ŒŒÔûÈ%š,²ñå@±¤¤dúôé4Ð)³à~) 3£ÑÈ2‹¥AƒÕ¹2fÌYB—»4ˆqÖ¦M›Ìfs‹»víJNN–7Ka2™ê\LIIéׯŸ¶AÄY ‘‚‚‚ü±Y³f¿:uêTdd䀤ÎR%//ÏÕÕ5000>>þöíÛzD©Y¾¾¾ùùù#FŒ0`À… xdÁ}Âhm”””TTTxyy5kÖŒSÀo"›Ïu¿ݳdŸÚm0ÒÓÓëûìµ×^Óï:eÜ'L¶ImmíòåËóóóÕ+AAA111qqqwÛ+HŠ,×…è,÷Up,§vSnÍ8lDÖŸ¡ì8;vìnG.iŽ2Ë‚lj7APaa¡:£­°°Pñå—_Λ7/--Mó)eÜ? €m—––×»woŸ²²²£G®\¹²¼¼|ÅŠòfD¸.DŸ9sæäÉ“ëôQÊËË7lØ í¸”,ˆ1²CÚ¸žÝZê â,®Èú3è8ÙŽlº5å¼îãÇßíºæ÷(³àþá °ML&“¿¿‹ÕÕÕRgŒø-D'{v„ 6by¨ ×,–?®g‰¬?CÙqâZ1Ȧ[SÎëæú¸î_{r‹ŠŠŠŽŽÎÉɱ\ÉÎΞ6mÚàÁƒ¥ÎJJJª­­­sq÷îÝŸ~ú©¼Y,%„¨­­]¶lYëÖ­}}}ÛµkçëëÛºuëwÞyçÎo@®,!Dee¥¹>òqÅòP®Y,ÇŠ~~~)ôY\‘õg(;N\+Fhhèc=¦Žßê4HåííýöÛoëñ•í›U]]œœœ››Ë,K{öÚ~š‡ÂÂÂððp!„§§g`` §§§"<<¼°°Pê,ÁñH –R%666888>>þСCgΜ9tèÐÚµk[·n½`Á©³êýê,t2}úôÏ?ÿœ_Ë3«Âþ eÇ $µb(äGI‘eiS mðÍ7ßœ={öĉêŒÐž={víÚUö,Át›P–Ú²eK5N?þxDDDDD„æ›|Pf±ÜW‰=N‡špÍb9Q“ëÃ7Ê-ú(³Èú3Ä'*†ƒ ¾CÕ¤I“#GŽÌž=;--M×£¤ˆ³4fï8Ÿ|ò §,!„Ë]țŲQŠ¢xxx×¹xãÆ ©³@"f³yéÒ¥AAA–w–   eË–Ýù@I– ÆY‘‘‘‰‰‰šY»g±$„HOOÿò?ÅÇÇ{zzJeAÖw"BÅ%ˆ·¼¼<___õãÿýßÿUgíéôx–2KskƒòVA– œJ–ŲQŠ¢Œ9òé§Ÿ>{ö¬åÊ™3gžzê©Q£FIE6kmG67žë„Ê,ÞŠ‹‹sssï¼O'i×û¶Ö¡ú}qâ T 1«?þøc³fÍ ~uêÔ©ÈÈÈH¥ €µ°ãg±l”Âw!:Ë»Îõþa0Xˆîëëk}[DuêÔ)???IƒgYpêùq}øÆõmË:”M*†DAŒ+Ù„_Ê,=` 0Ôƒå6¡,%ø.Dß»w¯_ÖŽA*– ÑYjÂ5«¶¶vùò剉‰ùùùê•   ˜˜˜¸¸8—ÿ\ÿ)QV\\\ZZZ\\\ïÞ½}||ÊÊÊŽ=ºråÊòòrÍ·' Ì‰ bÈ$øV Ê£¤(³taï8¥¥¥,³@"Ì¢[pºë,˜Nh$›ÏuÂ?eˉš\¾FÍ¿¦#dYõg‚P1d RøV EQÊÊÊþüç?ëñ•훥9 €¡,7ˆgÙ(k”÷³²Xî«$˜Nhdy¨ ×,–=?ìv‡Š!K‚Š˜m£üüü×^{íûï¿¿yó¦õEÙ³XnϲQŒ‘Í„FÛ±<Ô„kˉšQQQÑÑÑ+V¬°Ò–;xð`mƒˆ³XžY%û3”'T Y‚*†lYº°÷\n3fÌøâ‹/2¬0ÈQ¾œ ²XÞuf?¡‘Ó¡&\³XNÔäúðë™Udý»tœP1nܸýû÷{zzz{{———WUU…‡‡§¦¦È›%„ y FŸÅYÆ.'T ²@ÅpfÛäñÇ?pà@Æ3É)³XnʲQŒ©7}ïœ5äêêº}ûvƒ„LOÂX–,®=¿äääÉ“'ë÷õí•Åì¾-Y†²ãdŠ!E C®,-Ñ?tædõêÕ“'O>~üxžY,· eÙ(EQòòò¦L™Òµk×VV4O¡Ïb¹¯ûéIdï)”o^\³˜MÔT1ûeq=­”¬?CÙq²`öGhÁr… ³_׊¡<¶‰Á`¸ó¢N?RÊ,??¿ƒZˆ©²²²"""ŒF£¤Y,%„8p`‡ž~úéÆ[.4HÛú,×»Î\‘ÍÃg6áß.Y,ŸS1kÔ‚ ÒÒÒbccëlÑ7~üxÍ·è£Ì"ëÏPvœ,P1d â—ŵbè`›”••ÝyQ§bD™E¹*’åROÊ,ö ÑÉf az8f=?â š,®÷mÉú3”'öð*vü,®C ìý Èͧ> ²ÔMÛsrr,W²³³§M›¦ßñY,%„ðòò2›ÍšY»gYL™2…MPmmí²eËZ·níëëÛ®];__ßÖ­[¿óÎ;µµµRgåçç?ÿüóݺu ²¢y eã,Þ(ÏŸ#Èbyf• ìÏPvœP1d„Š!E–ðØ&}ûö½Ç¿fffJšÅr›P–B¬Y³æèÑ£±±±¾¾¾–‹:½RfYpºëÌuzÙÜx®þé¾59¡Ü¢2‹¬?CÙqBÅ.ˆ®CÛ$))éÿ:cÆ I³T,· å×(® Ñ­CÙ €¹NOby¨ ×,–òóó_{íµï¿ÿþæÍ›ÖeÏâzß–¬?CÙqBÅ*†\YzÀî…åñÌÅu!ºu(›»Î\¢³<Ô„kËžï‡oüîÛ²„Š!K@Å3K[ý0Û€8ˆ8 ×éIdsã¹Nø§ÌbÙócÿðÙ}[–P1d ¨Òfi½óz=~ü¸¤Ywb9VdÖ(® ÑYÞuæ:=‰å¡&\³Xöü¸>|³àñ¶EÖŸ±KÇ C– Š!m–†HlÄÆÆª\¼xqÆ ÑÑÑ;v,((ؼyóèÑ£åÍé±üÛ²žþù:¬X±Âúf°ÔABˆ€€€o¾ù†fÊeÙFšÌvì´K–º—;MoŒ,küøñÑÑÑ4Ä(³˜!ëÏØ¥ã„Š!K@Å<¶Qÿþý׬YóðëŸÆ &ìÝ»Wö, –Û„²l?,ï:[`zØ Ë‰š\¾Y‡rzžCÖŸ±Klj–+\P1$ÍҔߴãðòò*--uqq±\éÓ§Ï‘#Gdϰ;–û*Y0{sby¨ ×,–=?ì(WY†²ã„Š!K@Å6KC˜m“:lܸñ•W^Q?ÍÈÈЯ£I™År›Pfb¿lÖ¦'ÙŽln<× ÿ”Y,'jRv¿ìÒÕcÖ@²þ eÇ C– ÁîeßPG¿O€mtàÀaƵjÕJ]^rþüù¯¾ú*,,Lö,–Û„2kTrr²úA½kœÞ{ï=I³,XÞu¶åôÀ¾¸>|cvßÖ‚¬?CÙq‰ bÈ•¥tŒle4ÓÓÓóó󃂂ž}öY???Y,· eÙ(Áw!:Ù¬!LO§Â²ç—””tÕöqe³û¶ÖÈú3”4–X®pAÅ+K˜m““'Oº»»Ïš5K‘››[ZZª_m¥Ìb¹M(ËF !Nž>>õ.)”+‹åñ,¥b¹%® ÑYjÂ,‹eÏ{JšEÖŸ¡ì8¡b8x@Å6K²>¹vÁÁÁÛ·o7!!!ÞÞÞUUUnnn ²x,P´Wq×…è,qžÄòPfY,'jNž*uc}ûöýúë¯EÙ¾}»«««»»ûºuë¤bœõí·ß6iÒ¤k׮Çøá‡½½½¿ûî;Y7nÜHHHxã7ŒF£N)ôY T Y‚T¨NO€áÿ°Ü&”e£¬q]ˆÎò®3×éI,5áš%8NÔäúð $‚Š!Q*` ÿ‡åñ,UË…è,÷Uâ:=‰å¡&\³Xöü° Y¼¡bÈ$P1$ÉÒ—}@ƒc ;zô¨åÓ7nDFFʞŲQ¼‘ÍÂô$‘Íç:áŸ2‹åDMooo“ɤ(Jdddrrò™3g‚ƒƒõ¢ÉúäWo¿ýv@@@\\\RRÒÒ¥KÛµk÷ꫯʛÅ*†,A *†$YºÂøÁõêÕ«í=Iš¥Ð®Šd¹ÔËJmwâĉœœõã‹/ž;wNö ƾüòK777WW×/¾øBQ”ÇëôFHÄ8‹YÏOÕ´iSEQjjj<==ËÊÊÔh=‚ˆ³XÞ·%ëÏwœP1d RP1dËÒCC{?–XyyyBBÂÝþuêÔ©’f vç¹gq5kÖ¬%K–tîÜYïJAì§' >¼¤¤Ä27¾OŸ>}úô‘:ˆqVƒ ÜÝÝoݺ•™™ù·¿ýÍÛÛ»´´Tö¬àààíÛ·Æoo着*777=‚ˆ³Nž>~óæÍê&z € ‚bccÕê]®aq–5ý¾¸]‚¸f±ìù-Y²Ä²?ŸâôéÓ“'OÖ#ˆ8‹å}[²þ qÇI bH$P1dËÒ…}<ƒÃc¹A<ËFqE6kÓ“À©p¨YUUU^^®Ó·W×3«@",W¸(¨Reé»@k£¤¤¤¢¢ÂËË«Y³f ²XnʲQ½{÷.))¹ÇÿpéÒ%³Ô¸7ß|Óh4nÞ¼933³ªªª}ûöׯ_×0‚8HáååUZZêââb¹Ò§OŸ#GŽÈžra¹okÖ,!Dnnnii©~`²,–‡šp͹°<³JögÈ‚P1Àp­zÀØ&QQQÑÑÑ+V¬°Ü_ÌÎÎŽõÔS;wîÔ$ˆ8KÅø¾-Y†¸ã„ŠáÈACÎ,Í¡ ÀDYY™,–wí™Ã”YdwF¸ÞîÑ;‹wÏÀ b8r€ `›Ì˜1cÈ!cÇŽe–`_,ï: '˜ž„°DY,qzøÆYÆ.'T ‰ b8-l‚e“K—.‘mzF™²»uëV@@ÍÁKzd±ÜWI1|øð’’Ë”¡>}úôéÓGÛ»dŒXöü° Èú3è8iˆå T §…[Gr[¿~ý”)Sê¬Á¨©©iܸ±æ¯nÊ,Ù<|Nþí•Åò9ËF\P1d bœ¿ ¿ m”””¨û+4kÖLö,–Û„²l”ªeË–7oÞœ8qbLLLïÞ½Õ‹: J)³,XÞufæöíÛBˆ ° r,{~,ÅY߉²“Æ^ÅeÁoÂ/Ã&µµµË—/OLLÌÏÏW¯ÅÄÄÄÅŹ¸¸HšÅr›P–R™Íæ¿ÿýï ÿú׿}ôј˜˜ñãÇ =¥”Y,ßt™=zôÃ?üÆoX_ÌÌÌÌÈÈxóÍ7e B$%%EGGש®»wï...ž4i’¼Y,{~œ¾1¾oKÖŸ¡ì¤¡bH$P1dÈÒ :|6Y°`AZZZlllïÞ½Õ#FŽ=ºråÊñãǯX±BÒ,–Û„²lTçÏŸOLLܺu«Ùlž8q↠ô{uSf±|Óe¦eË–ÿú׿ºuë&„0™L_|ñŸqãrrrFŒqîÜ9ƒ„ƒÁd2¹»»[_LMM]»víÁƒåͲàÔóc‰ñ}[²þ e' Cº fW = Ãg??¿ƒvéÒÅúbVVVDD„Ñh”7 äuëÖ­´´´ÄÄÄèýê¦ÉÂØñ¹ººFooo!DAAA¯^½ KKKM&“ŒABƒÁžžÞ¨Q#ë‹—/_^´hQee¥¼Y,±|øÆø¾-Y†²ã„Š!T ‰²t‚ŸM<==óòòê,)1ÁÁÁUUUòfYpZÛLDœUG~~~PPƒ,6wOOjÕªÕÎ;{öì)„ÈÌÌŒˆˆ¨¬¬ÌËË{ä‘GŠŠŠd B †»M’4›Íòf±ìù±øÆ Y†²ã„Š!K@Å €m4jÔ(EQV¬Xa¹¿˜ëêêz[#ŽŸÅrm3ËFYs’½¼OOš4iÒõë×7nÜ(„X³fMBBÂk¯½–ŸŸ_RRò÷¿ÿ]Æ q—^‹Nìž%{ÏýÃ7fgV‘õg(;NvSféô*¦,M¨2fi`›\¿~}ܸqû÷ï÷ôôôöö.//¯ªª OMM 7‹åÚf–N0°·Fv¾±AŒ§'?>**êÒ¥K_|ñűcÇÞzë­Î;oÛ¶­cÇŽ2 èbê—ůçÇõá›u(§ËÈú3”'T Y‚*†´Y’ò›v4gÏž=qâ„ú@¬gÏž]»v•=‹åÚf–|ödgãpcM˜ÍæìììÀÀ@___AEEE~~~ºFØ%‹eÏëØÃ:”_w–¬ïD„Š!K@Å6KCR~ÓˆÙìS–k›Y6JðØ“9Ìûpcâ,–+ù™aÙócÙ¨:¡,»³¨Ž¯b‰²¬CYV 5°÷7 ·ÚÚÚeË–µnÝÚ××·]»v¾¾¾­[·~çwjkk¥ÎŠŠŠŠŽŽÎÉɱ\ÉÎΞ6mÚàÁƒåÍbÙ(!„Édò÷÷¯s1  ººZꬼ¼¼­[·^¼xñ‘Gyì±Ç¶lÙ¢ù6¿ÄAÖ¦L™¢weYu¢,ƒ3fÌØ¶m›æ_ÖîY,F²þ%e–Eii)§,T ‰²XBÅ4KK Ø 666888>>þСCgΜ9tèÐÚµk[·n½`Á©³ ÃÃÃ…žžžžžžBˆðððÂÂBy³X6JQ”‘#G>ýôÓgÏžµ\9sæÌSO=¥nþ!o–ÅO?ýëçççããóÒK/éWµÈ‚E¡¬½YdÕ‰² FFF&&&jþeíže4i‚ˆ³TÅÅŹ¹¹ÅÅÅ̲˜AÅ(‹ìUL_.T '†°M|}}­ªS§NùùùI¥ÊÎÎNIIùðÃSRR²³³uJ!Îâ×(®û:jjj’““Ÿxâ ½‡s4AÌÀdÕ‰¾ ‚ÌfóÒ¥K­c Z¶l™Ùl–:kÓ¦Mw~Ù]»v%''K…Šö…Š!W–0¶‰‡‡Ç÷rnܸááá!uH‡ßÀþnòòò•––ê÷Åé³Èª“]Ê ³GÓ§OÿüóÏõûúvÉâ:Ka2™ê\LIIéׯŸÔY¨e‘½Š)K*†\Yzrá²ãàzðŒ3† 2vìXm¿¬}³X6ʳØè³°KÊcyª'ׯ ôÜsϽøâ‹Ú~YûfqÝ å™UCª,²W1eiBÅ+Kv|3Àuö)Ë¥,,¥ðÉC–EÙ¨zÕÔÔx{{KÅr%?×G,q%„p¹ ©³P1$Êb C®,=à °ø áz0YËɳTÌNõäúˆÀ‚Ó<®³±xÖ‚Š!E–§ÙX¨reéÂŽƒop|Ì–²Ñdq݉å.)-Z´ðôôœ9sæ?ü`¹¨žº$uK\°œÇÁu6–¨o•N(³XBÅ%HAÅ-Kè =¸îÝ»ÇÄÄ,_¾œY–´‡Ä²Q ß7]–»¤üòË/Û¶m‹ŒŒ4 }úôÙ¼ysuuµNƒRÊ,–»¤p=`ŒñDM~{²<³Š¬?CÜqBÅ%ÈC–,=`üà.]º4mÚ´:0ËR˜öX6Jáû¦K–Åûpcš,–+ù¹>"`9ƒ=N—Èú3Ä'T Y‚œ§Š¡ €¡,{H,¥ð}Óe¹KJd‡gqÂïËy,ÏvRøN\b ÃñƒT Ù²ôÐPÜÁd2ùûû×¹P]]-oËF©_ö›o¾¡Ùäƒee£êpss›4iÒ¤I“,GYÈžÅi—U×®]›7oÎ)+***::úÎY,oÖ¥K—JKKµýšŽ—––Wg‹¾òòrÍ·è£Ìb Ãñƒ*†lYº°÷fºJ”ÅvGsü,¬ä—(‹å<®¸N\b C– ÆP1îÀP–=$–ÂNl1ÎÂJ~‰²Tü&jª˜ÝZb9_+T ¹‚T¨Rdé`¸+–=$fÂNl1ÎÂJ~‰²,8õü¸ÞZÂÄ%‰ bH„Š!W–0†{áÔC¢"Îbc*‰²Xî’Â5‹eÏë­%–—¸BÅ%HAÅ-KÛdÓ¦Mw¾2wíÚ•œœ,uËËF1†1•DYXÉ/QËž×[K*f—Âþ eÇ C– CÎ,mal!„Édªs1%%¥_¿~Rg±ì!±lcSI”…•üe±ìùq½µÄY†²ã„Š!K‚ŠŠbPEÀƒ2 ééé5²¾xùòåE‹UVVÊ›åççwðàAËNôª¬¬¬ˆˆ£Ñ(iËF1výúõqãÆíß¿ßÓÓÓÛÛ»¼¼¼ªª*<<<555 @Æ ÆY*²£¤(Ϭâ—åéé™——WçУÑ\UU%i–z[çÎT\]]·oß®aqÖŒ3† 2vìXm¿¬Ý³Èú3”'*†ã T Ù²ô€°M ƒ‹‹K½ÿd6›åÍbÙCbÙ(ö0¦’( ¤À²çÇõÖÒ Aƒž{î¹_|QÛ/k÷,²þ elj+²W1eiBÅ+KÛÄ`0˜L&wwwfY,{H,’*))Q¥uî’ȘգG?üámÛ¶‹‹ÓãëÓ1Î|{~·–äAÖŸ! BÅ(ÈÙal®`–=$–º›[·n”••IšÅrL%„¨­­]¾|ybbb~~¾z%(((&&&..în)?ëòåË‹/>pàÀùóç5ü²v bœežŸ\8Ý2À¨ÒñƬbè`›ùùùñËR±ì!ñkÔúõë§L™âååe}±¦¦¦qãÆš¿ºÉ²XŽ©„ ,HKK‹íÝ»·OYYÙÑ£GW®\9~üø+VÈ›`GŒ¾±¼e&û3ô'p|¨Òeé‚xÓ-ÐV‹-<==gΜùÃ?X.šL&=^Ý”Y,±Ü€$Ò½{÷˜˜˜åË—sʺtéÒ´iÓ:tè wq–‚à ÀÞÈ^Å”¥ Cº,= Ûj“éÓ§þùç̲XöX6JõË/¿lÛ¶-22Ò`0ôéÓgóæÍÕÕÕ: J)³XÂÑ `_Œ{~,q½eFÖŸ¡ì¤±Dö*F¹Ð׊¡‡ö}þ,»K—.•––2ËÚ¹sgMMMRR§,–R5lØp̘1{÷î=wîÜþð‡… Ο?_ö,–¢¢¢¢££srr,W²³³§M›6xð`©³@m۶ݲe Í„Ê,®L&“¿¿‹ÕÕÕRg‘õg(;i,‘½ŠQ.4Áµbèk€X¹uëVZZZbbâô~uSfñàT;±€pxÜ?TŒû‡°6°ß8šüüü   ~Y ðÛ‰íöíÛBˆ tŸRDàØß2#ëÏ ã΀}ÅÐÀ6Á~kà8pìeôèÑ?üðo¼a}1333##ãÍ7ß”1H‘””]§ºîÞ½»¸¸xÒ¤IòftøÝ2#ëÏPvœP1ÀA𫺰Ûêc°ßØÙl^ºt©õØ   eË–™Íf©³êUSSãíí-iãØZ´hqæÌõãêêê””EQΞ=Û©S'IƒEB˜L¦:SRRúõë'u€Ý‘õg(;N¨Á`›øùùzïÅÿøÇúf‰$òæ›àÓJ•õ3*'*†F¨eY%ÇpÍq¨ rNê[ï•9ÌáÆ™+--½téRòç>úhÞ¼y±Xìúõë.—KÓ D"aÆœihõ»ßýîÓ[6Nž<ÙÞÞ®o–!ñæ›à3«”õ3*'*†.A *†VYápF¤¾oMäÚ˜ÈA†át:oܸ1åYÜP(TYY‰DôÍŠÅbÇŽkkk;{öì—¾ô¥æææ—_~ÙápdýƬ² Á6nÜ822²ÿ~Ã0Þyç¶¶¶×_}pp0;vLÇ Ã0G47o^v­=³>¼wïÞ®®.M³¸ù¦eýŒÊÆ)çßb•Y}‹U–&*Æ,Ç8#Rß·&µC’7(Cî*Lʵk×ü~ÿþð‡X,öÍo~óÝwßµ¨j) ’çÚµkõõõÁ`pþüùÇ¿xñâŽ;–,YÒÑѱxñbƒ ´˜ÖeÉëüÇtï4ŠÅbY Rœ%•²~&ç“EDV Å¥‰Š1Ë1Îyï[“Ú!É”!wf egs¸ñÉÅb@ ´´ÔårÉ-..¶4"'Y";?©sÙ”õNj‚¨ºT 0Îa'ÐHíä *EÞ*Ìt”9ÌáÆS=íOdç'rP³Ãþøk”…ô=’ë¿€Þâñø®]»***\.×¢E‹\.WEEÅîÝ»ãñ¸ÖY†aLLLÄîGë,‘ƒJòx<õÿaõŒTe–aáp8 †ÃáäZ7)U$²ê¤² nݺµ££#ë¿6çY"…B!eý¥Ê,©¨e‰DÅÀÜ\ÿôæõz;;;½^ï”SaLÓÌú©0*³ ‘x<¾gÏ¿ß?88˜¼R^^ÞÜÜìõz§» MVN‚¦sûöm·Û=>>®o–²ßöZ0IDATꤲ ƒÁ±±±ìþN;d‰ìüRÛAÜèS™%C£,eßb•¥‰ŠÎùȈÔhB¡PÖgγD*‘H´´´TVVîÛ·ïÂ… —/_¾páÂÞ½{+**¶oßN–­‚‰Ä¾}ûLÓœrÑ¢#—Tf)«N*Ë 4‹ÅvîÜyï®òòò]»v}úT½²DžY• b רzeY pF89'uFd‡¤òÌa•Y"OõL¹yóæõë×?«iÖ·¿ýí?ÿùÏÖýþœd‰\›K=­4AÅÐ*KÙ·Xei¢bè•e&ÀùÆ7¾ñõ¯ýÞîùòåË/½ôRò´}³DvH"•» #²CºsçNGGÇ /¼àp8¾üå/_êÿa hiiÉËË;zô¨¾Y²k¡bh‘•"éÌ**†^Y–Èáä[ŒO>ùäÃ?–•÷(‹â •Y@àСC¿ýío:ȲmÐ}ݸqCd–Ö¤Þ"¹QSên,ã~OÙYDeV’²~FMC— C·,+0ÎÈ¿þõ¯ 68‡ÃaFCCÃÄÄ„€,‘’ÈAͬŒØ?Kä“ü"7ü'DoÔ”·6'õðeýŒÊÆ‰Š¡KP C—,+0ÎÈöíÛ¿öµ¯ŒŒTUU ¼øâ‹?üád‰ìDª©©i†O7nܨiV‚•­²D>É/õÔ·¿È&lÉLY?£²q¢bè4«V`œ‘Ï}îsŸ|òI"‘¨ªªJ$CCC .%²C9¨§žzê£é=ñÄšf%XÑ*K0y·DnÔ”ú’©KfÊú•SÃþA *†nYV`œ‘üüüäÉÚzûöí¢¢"Y";$‘ƒZ¸p¡sFšf%XÑ*+…ýêöϹQSêKþ¥.™)ëgT6N)T ›%¨ºeY pF>ûÙφÃáD"QUU‹Å~úÓŸ~õ«_%²C9(ÁXÑ(‹ýêeIݨ)’Ô%3eýŒÊÆ‰Š¡K`R+†˜gä•W^InlxüñÇ?ó™Ï<ûì³×¯_%²C9(ÁXÑ(‹ýêe%ÉÛ¨™$ìæ›Ô%3eýŒÊÆ‰Š¡WPC‹,+0ÎH,»sçN"‘8uêÔÅ‹-Ýø®2+Id‡$rP"±2¢QûÕ5ÊJ‘ÔùI½ù&uÉLY?£²q¢bhDÅÐ+Ë L€1I’ú ÅY"±2¢EûÕ5ÊÙùI½ù&uÉL$*†.A *†nYV`œápø?ø˜,‘’ÈAASÂVaد®Q–ÈÎOöÍ7yKf)Êz'AT ]‚T =³²‹ pvܸqÃår‰ÉÙ!‰”TÊΖz¸±â,ö«k”%²ó“zóMŸ/‹Aг’úûû{{{oݺµ`Á‚êêjÇcEŠÊ ‘YN§óÆEEE÷^ …B•••‘HDÓ¬ä/ŸÏ—ªº@ ¥¥%//ïèÑ£Y RœµuëÖ5kÖ444d÷׿<+EYï¤,ˆŠaÿ ƒŠ¡[–%r:ýÖÞÐüýï/**Jþ<::ª{–ȵ1‘ƒ’JÙ™ÃR7VœˆÜ¨)õæ›ÔÓJ•õ3*'©D>áBÅÐ+Ë L€32ç†a$¨®®Ö=Kd‡$rPЋÈU‘ûÕ¥f%äv~ ž|Ó‡²~FYC£ *ÆlÆèì¶zdd¤±±ñܹsN§³  À4ÍH$R[[{øða·Û­i–ÈAA/"·]‰Ü¯.5+EÞFMÙÂápòà” ¢ºg‰ÙMÅÐ.H6©#‹˜g‡° p’ÈIä   ‘«0•••3—£‰‰ ½‚g‰´eË–ƒN÷é¦M›f˜-Ø9Ë0Œx<¾gÏ¿ß?88˜¼R^^ÞÜÜìõz“·45ÍJ3¦bh„Š¡]–ææú/ûr:eee«V­*++“%rPÐ…ÛíþàƒÔ¬Œ(Ë ƒYÿ¹ œ%²ó;þ|ww÷tŸž9s&+)ê³ Ãðz½^¯wÊ«ìLÓÌú«ìTfÉCÅÐ%È bh˜eîgÇ­[·~ó›ßüä'?‘‘511±yóæ#Gކ‘H$Þ{ï½äóúf‰¤OäFMÁ7ßÄ^ ¬wRÙ¤I"ò *†vYV`ŒûøÑ~~ÿûß?÷ÜsûÛß¾ûÝïVUUýêW¿Ò:Kä  ‘7ß ÁŸH"Ϭ‚FD>á"#}L€3r÷î]Ã0yäaYO=õÔ|PQQáñxúûû‡‡‡¿ò•¯X´ëFY–ÈAA#"o¾°ŽÈ׿ û•sR+†%rðæiAÖ­[·sçÎ)?ú裷ß~[ë¬üüüäUUU‰DâöíÛEEEYOQœ%rPЈÊ3‡Ÿo À RϬRÖϨlœ€œ“Z1¬ÀàŒ<ùä“gÏž}úé§ ÃˆF£Çoll¼råÊÚµk?þøc}³.\xéÒ¥Ç{ÌãñüóŸÿܱcÇ¥K—þú׿f7Eq–ÈAd“wx²~FeãØ„¼Ša&ÀÉËË …B†a ­X±bxxxll¬´´4ê›õ­o}kíÚµ n·;.Z´è/ùKeeevSg‰s555ápx†?­ÍÿÊ‚XJY?£²q ŽAÊHIII0¬®®6 c``À4Íx}õÕWµ ì@ðkó”õ3*' ·W +0ÎÈóÏ?¿mÛ¶ýû÷†ÑÞÞ~çηÞzkpppÕªUZg¥fkkÖ¬Éú/ÏU–ÈA9÷ì³Ï¾øâ‹Ó}ºråJí‚;|Z©²~Feãä–àŠa¶@gäÚµkõõõÁ`pþüùÇ¿xñâŽ;–,YÒÑѱxñb}³RÆÆÆ~ö³Ÿýú׿¶è÷ç$Kä ’>³JY?““Æ È Áà L€3‹Å@ii©Ëå’”•488¸|ùò™ÿ•k—%rP€´··oÚ´IR+(ëgÔ7Nì³Ñ25wîÜeË–9Ž`08ó ZôÊ€õÊ+¯ `eý €Ocœ‘x<¾k×®ŠŠ —˵hÑ"—ËUQQ±{÷îx<®uÖð„B¡D"‘üùæÍ›YR™%rPèEY?£²q ^‚•¯×ÛÙÙéõzkjj ÇÇÇ{zzZ[[MÓôù|úf•——§~ŽÇãÉÿü¾péÒ¥ì©Ì9(ô¢¬ŸQÙ8ÐÏg¤¸¸¸«««ªªêÞ‹}}}uuu¡PH߬‘ËŠ`ããã………’‚d²~&'ûc tF¢ÑhIIÉ”‹n·{rrRë,HßÝ»wïÞ½k†Õ“ReA¬£¬Ÿ¡qp_lÎH}}}SS“ÏçK­/–––Õ«WkéÛ°aÃÊ•+ßxã{/vww¿ÿþûo¾ù¦ŽA@ÎÕÔÔÌüÒ¦`0¨c–¡°Ÿ¡qÂì!¸bX pFü~cc£Çãq:¦iF"‘ÚÚÚƒj•²`Á‚mÛ¶Y÷ûs’%rP@uwwÿâ¿HþF?ÞØØøØcýéOÊî¼TYs¦i¶µµM÷髯¾ªi–¡°ŸÉIãä„àŠaž΂þþþÞÞÞ[·n-X° ººÚãñÈÈ€täåå…B¡‚‚Ã0†††V¬X1<<<66VZZFu rnýúõGîÓ—^zéĉ:f¥(ëghœ0ˆ¯ÙÅ8;Âáp²¶ ÈJ>e÷È#*W–%rP€”••8q¢ººÚ0Œîîº‰‰‰7n|ñ‹_Ìîà”P@Y爐I`4è‘zð† Rû Sº»»wîÜ©o–ÈAvðüóÏoÛ¶íêÕ«W¯^moo¿sçÎ[o½õöÛo¯ZµJÓ ÀVÚÛÛ%eq0`)aà d ¥¥¥²²rß¾}.\¸|ùò… öîÝ[QQ±}ûv­³žxâ‰Ë—/'žœœ=È“Š˜mb±X (--u¹\2‚ûp8Ôuk ²8°”°Ša¶@gDê9À%%%©7˜ ˜¦ÇMÓt:úf‰`sçÎ]¶l™Ãáƒ3ŸŽ K‹p0€Übœ‘äsW®\I] ›7o¶î`5Y*´ùô O*bVá‰>ÀRccc’²”õ3*'À>„U Kät¶ö†‡‡kkk Ãp:¥¥¥Éû{µµµÃÃÃZg©|ÐNäÓƒ<©ˆY…'ú¤OY?£²q -÷mÛÈs€U>h'òéAžTÄìÁ}€8ÐÔÔ4gΜ{/ž:uêæÍ›7nÔ7+‰s€ì’]1²‹ 0f"ì|cÅAг€\q:7nܘò< UVVF"ƒ;p8ÑhtÞ¼y÷^<|øðÞ½{»ººôÍ`*FúxøámÙ²e†O7mÚ¤i–!ô|c‘ƒì€'ú‹œŸOÓ,‘ƒìÀï÷766z<§ÓYPP`šf$©­­=xð ¦A€M444ËRÖÏ(nœ;W1,Âè‡WYY9:::ؘ˜Ð1Ëz¾±ÈAöÁ}@vÝw“¡îYÊúÅs"+†E¸üðR½ Ë2„žo,rP€}x<5sQeA²NY?£¸q žÆ}ˆ<ßXä ;غukGG‡¤ ÀB¡²,*³XŠ‘>&À¸¿ßoš¦ÇãÉÏÏ/++ËÏÏ_ºti$ikkÓ7Kä ;ƒccc’‚;(..Nþ‡ƒÁ`8–‘À TŒôñ 0¦%ò|c‘ƒÈÇ÷ìÙã÷û“WÊËË›››½^ï”ã7õÊ`*FúxÓòx*Fúx÷!òÈ\‘ƒì@ä·ȹuëÖ% ŸÏ—ú7ZZZòòòŽ=ªo+P1ÒÇ[ q"×D °‘ßb ç8¼@ú¨éã0îCä’ÈAv ò[ ؇H#L€q###çÎs:¦iF"‘ÚÚÚÇ»ÝnM³D °‘ßb `LKä’ÈAv ò[ äÐÖ­[׬YÓÐÐ , €¨éã-И–ÇãQÖY*Ë9(ÀD~‹ ƒcccò²XŠ‘>^‚…©¶lÙ2ç›6mÒ1Kä ;xæ™g¾÷½ïýò—¿ØÄéÓ§_{í5yY¬@ÅH[ 1ÕâÅ‹ÛÛÛ§ûtýúõCCCÚe‰`?ÿùÏ?üðÃk×®Éì&'÷üIÊ`*ÆÿÄSUVVŽŽŽÎð&&&´Ë9(€`ñx|Ïž=~¿pp0y¥¼¼¼¹¹ÙëõΙ3Gß,V b¤g€1U0”—%rPÁ¼^ogg§×ë­©©),,ïééimm5MÓçóé›À TŒôqÀvŠ‹‹»ººR§^'õõõÕÕÕ…B!}³XŠ‘>^‚`;Ñh´¤¤dÊE·Û=99©u+P1ÒÇÀvêë뛚š®\¹’º6oÞ¼zõj­³XŠ‘>&À€ŒÜ½{÷îÝ»’‚;ðûý¦iz<žüüü²²²üüü¥K—F"‘¶¶6­³XŠ‘>žddýúõ+W®|ã7î½ØÝÝýþûï¿ùæ›:öÑßßßÛÛ›ŸOÇ [ )..îêꪪªº÷b___]]](Ò1°ƒ455Í™3çÞ‹§NºyóæÆõÍ`*FúØ ÈH4-))™rÑívONNjØÁw¾ó;wîL¹hšæ»ï¾«u+P1ÒÇh@Fêë뛚š|>_êÞl hiiY½zµ¦A€Mœ|ØívëØÃᘲÃ0%‹é›À TŒô1dAoooòt¢êêjÇ£{[‡#Λ7OX+P1ÒÇÀvhg¤Š‘>^‚ÈÈÖ­[;::$v …”õ—*³XŠ‘>^‚ÈH0“ØAqqqò‡p8œÜó_TT$ €¨éã0 #§OŸ~íµ×$vÇwíÚUQQár¹-Zär¹***vïÞǵÎ`*Fú¸ Èe+Áš.9Äëõvvvz½ÞšššÂÂÂñññžžžÖÖVÓ4}>Ÿ¾Y¬@ÅH/Ád$ïÙ³Çï÷&¯”——777{½ÞéŽI°y`ÅÅÅ]]]©S¯“úúúêêêB¡¾Y¬@ÅHw€Q¶¬û’3ð@¢ÑhIIÉ”‹n·{rrRë,V b¤;À€Œ([ Ö}Éx ëÖ­K$>Ÿ/õo>´´´äåå=zTß,V b¤—`2¢l%X÷%gàøý~Ó4=O~~~YYY~~þÒ¥K#‘H[[›ÖY¬@ÅHw€Q¶¬û’3ðúûû{{{“o}«®®öx<2²XŠ‘&À€ŒŒŒŒ466ž;wÎét˜¦‰Djkk>ìv»u R1d²•`}—œ²uëÖ5kÖ444Ë`*Fúx 4 <𹍲 ·‚ÁàØØ˜¼,V b¤;À€‡·eË–ƒN÷é¦M›ÚÛÛõ ‚qððΟ?ßÝÝ=ݧgΜÑ.°›p8œÜó_TT$) €¨ÿw€¯²²rttt†?011¡W`ñx|Ïž=~¿pp0y¥¼¼¼¹¹ÙëõΙ3Gß,V b¤;À€‡ …6áõz;;;½^oMMMaaáøøxOOOkk«iš>ŸOß,V b¤;À¶S\\ÜÕÕ•:õ:©¯¯¯®®. é›À TŒô=’뿦ŠF£%%%S.ºÝîÉÉI­³XŠ‘>&À¶S__ßÔÔtåʕԕ@ °yóæÕ«WkÀ TŒô1°¿ßoš¦ÇãÉÏÏ/++ËÏÏ_ºti$ikkÓ: €¨éã`›êïïïííM4R]]íñxdd°#L€³[ ìeË–-3|ºiÓ&M³XŠñ@8À^Ο?ßÝÝ=ݧgΜÑ4 €¨„-ÐöRYY9:::ؘ˜Ð1 €¨„ 0`Và`À¬À0+0Ì L€³`À¬À0+0Ì L€³`À¬À0+0Ì L€³`À¬À0+0Ì ÿ´„Ø6ìX…sIEND®B`‚sleef-3.3.1/doc/html/nontrigsp.png000066400000000000000000001300411333715643700170710ustar00rootroot00000000000000‰PNG  IHDR óúX¬bKGDÿÿÿ ½§“ IDATxœìÝ}\Tuþÿÿ÷ ˆÈ…ˆ\ˆ¨!ˆ![¬)Z’Zn€e¥æ -•¢fmßUÓÌ]Ùh×’Œ”43dMW+SË«2ÓÕÄ Ó! QÁ1 ®æ÷Çù5ŸY.fƘ„ó¸ßösÞç}Þï×9ãîmŸœ+…V«tuwttX `€,€²@È `€,€²@È `€,€²@È `€,€²@Ȃ͒%K:ºĈ#„¿ûÝלּoG±PÍ €\XÕ¹sçbcc=<<”JåÀçÎ+µ§¦¦FDDtlmuÏ=÷deeéVÍu¼¶™Î5,m±íè2¢Õj'L˜0zôèÜÜ\77·‚‚‚½{÷J›¦L™Ò±µY™…Ž· [__ß­[7K”®¬çêÕ«EEE)))ÞÞÞöööÁÁÁÏ?ÿ¼´iĈ™™™Òò°aÃ^zé¥èèè¡C‡~õÕWRûåË—zè!—ÀÀÀõë×+Šëׯ7›âæÍ›Ï=÷œ¿¿¿››Ûĉ/^¼Ø²ŒVû”––zzz~ðÁRŸY³f3¦±±ÑÀ˜UUUÏ=÷\ÿþý{ôèrìØ1!„Ï_|!u¸té’B¡P«ÕÏ?ÿ|^^Þ¢E‹8uêÔfÇ{õêÕ'žx¢OŸ>^^^³fͪ¨¨0|t ;lذ+VŒ=Z©T†††þðÃï¿ÿ¾¿¿Ïž=“““µZ­égÌ”_GŸ‰SëÛêÉ6lØ¢E‹F´cÇŽ¶N”¾üãýû÷wqqñôôœ?¾ácluR@FXOïÞ½‡ ’’’²uëÖóçÏè¹cÇŽ?ü0///55uúôéRã£>Ú§OŸÒÒÒ#GŽè’j3O=õÔ… Ž;VZZ:tèи¸¸¦¦&SúxyymÞ¼999ùìÙ³YYYŸþù–-[lll Œ9cÆŒ¼¼¼ƒVUU}üñÇmNzzzHHȪU«ÎŸ?Ÿ““Ólë£>ZSSsîܹ¼¼¼+W®Ì˜1Ãðy0qX!Äúõëßx㊊ŠÐÐÐGyäСC¹¹¹'NœÈÎÎÞ¹s§ég¬ÃU™>µ¾¶Næ¶mÛ¶lÙröìÙ©S§8Q’üüüE‹íÞ½[­V<öØc†Ñô_ÐEh°¢òòòW^yåž{î±³³óññY³fÔ>|øðwß}WZ }ã7¤eé*ß•+W~üñG!Dyy¹Ô~ðàA!ĵk×ô÷-))BK}œœœNŸ>­_€á>ùË_ Ô£G/¿üÒpé*âùóç› ··÷¾}ûô÷½qã†V«½ûî»7lؠ릫ùÂ… Bˆ‹/JíyyyBˆŸ~ú©­óÐlº¶†•v_µj•´,]”–*ÑjµÓ¦MûóŸÿlâk9¬ÑªL™ZضNfhhèÊ•+¥e'J§  ÀÎÎnóæÍjµZ×x«¿   ã`€U¹ºº.[¶lÙ²eµµµ›7oNHH˜0aB³nîîîÒ‚R©Bh4šË—/;99¹ººJíýúõk9xaa¡B¡ˆŽŽÖµ888”––Þu×]&öIJJZ¹råˆ#î¿ÿ~ÃýÕjµ­­í€Úq2„¢¤¤ÄÖÖÖÏÏOZ•,))ñòòjõxß¾}¥GGGGGGݪF£1ptúg¬%Sª2:µ¾¢¢¢¶N¦···´`øDIú÷ïÿÑG­Y³&!!aèСþóŸ~øaKÿ‚€N„ èööösçÎ]±bÅ©S§Zà–¼½½oÞ¼YQQ!eàVU•RñW_}eà^V}šššfΜ9qâÄ£G®_¿~Μ9ú744è·÷èÑ£¶¶VZÖDùŽ;ZìÈ××·¡¡áÒ¥K>>>Bˆ‚‚©±­ú›ikX™rƬÀß߿Փ)„P(Ò‚‰'*66666ö—_~ÉÊÊš4iÒõë×oõta< °žk׮͟?ÿäÉ“¦²²2##ãâÅ‹&~gàÀ÷Þ{ojjjMMÍõë×—.]Ú²ŸŸ_LLL||¼îÞã­[·644˜Øgùòå—.]Ú¸qã–-[¤÷Kèïçç7qâÄÄÄÄ’’­V›ŸŸ/E²»ï¾{×®]Bˆ†††´´4ݼ^^^*•ªeÍ£Fš7o^eeeEEEjjê„ ô¯jÖÖ°&2åŒYA['SŸ)'*??ß¾}µµµvvv …ÂÆÆæVÁœœœuëÖYíØÖDXR©üù矟|òIOOO??¿õë×oÚ´iôèÑ&îž]ZZêåå)½ßÈÎήYŸ7Þyç£GîÑ£GXXاŸ~ª»„h¸ÏþýûW¯^““ãèèµpá©S§þüóÏÆüàƒî¼óÎÈÈHggçÉ“'K×{_{íµüüü!C†Œ9òž{îÑMºpáÂ;vôêÕKÿ^\ÝqÙØØÜyçÁÁÁîîî7n4ý”ÖD¦œ1+hõd6côDÕÖÖ¾òÊ+}úôquu]¾|ù¶mÛÄ-þ‚{öìÙºu«åÐÚ_?E@'²ÿþ)S¦´ú!€VqÐiœ>>‹-’²ëÙ³gƒƒƒÏ;زç'Ÿ|ÛÐÐÐê8ÿüç?ÓÓÓ/\¸`Ùr\Ç\.//¿|ùrxx¸´äààpúôéß0Ô‘#GÂÂÂÌZ  ²5ÞŤ§v{öì©kqqqù ò¾ýöÛû÷ï?~ü¸>vvvuuu¿¡HÀmÂ,7/wLvvvBTVVêZÔjµÔhº7ß|ó¯ýëüýý t«««³òmÞ …µo,gÆ®1c‡LÊŒÌÈŒÌhÍ;dRfdFf”óŒ2)3ZhF³ŒÓ1·@»¹¹y{{Ÿ[£çþûï7½}×®]-ŒŽŽÞµk×O?ýtâÄ —)S¦´ìsõêÕôôô‚‚‚íÛ·ïß¿ÿOúSûŽ»¹Ï?ÿ|Ô¨Qýû÷?|øðõë×·oß®Õj÷îÝkâîõõõ¦t›2eŠ.—••éNËÓO?=jÔ¨VÓï7ß|3a„ɓ'«TªÜÜÜÇÜðî+W®ÌÎÎ>pà@QQQ]]Ý´iÓ¤þmµ·ÎÊéWñÓOÖžQAdåêÕ«EEE)))ÞÞÞöööÁÁÁÏ?ÿ¼á]lllìõÜqǦ·wëÖ­å€#FŒ ìÕ«—¯¯¯¯¯¯J¥jÙgýúõ<ð@Ÿ>}ÂÃÓ““¿þúëVå‰'žèÓ§——׬Y³***¤öaƽôÒKÑÑÑ!!!C‡ýꫯší¨ÕjŸ~úéY³f¥§§;::<øõ×_—Ò¦a-Z4zôè   ;vܼyó¹çžó÷÷wss›8qâÅ‹[=RÝ-ÐvvvÒ9ijjÚ²eKbbëï“|ùå—gÏžý /øùùùùù=øàƒ†wÏÈÈX°`AHHˆ‡‡ÇêÕ«<(϶ÚeŽ ÈHïÞ½‡ ’’’²uëÖóçÏwTÙÙÙ}úôqrrzíµ×–.]j¸ó‘#GÂÂÂZ¶?úè£555çÎËËË»råÊŒ3t›vìØñá‡æå奦¦NŸ>½ÙŽgÏž-**š5kV³v…BaxØmÛ¶mÙ²åìÙ³S§N}ê©§.\¸pìØ±ÒÒÒ¡C‡ÆÅÅ555™rì}ô‘­­m«×½ëëë¿þúk{{û»îº«gÏž-“¿þîååå—/_—6988œ>}º­vSÊëÚÀ€Œ(ŠÃ‡3æoû[HHˆ¯¯ï[ÆÅ|çwÜõ讈šÒ¾{÷îVÇ|ä‘GN:uàÀ„„„VíÎÛo¿½ÿþÕ«W7k/((8tèЛo¾éâââææ–––öÙgŸ•––J[<<<¤‰Š‹‹ËÊÊô÷½víšÂÛÛ»åt†‡÷òòB\ºtéã?^»v­§§g÷îÝ—/_þã?æåå:¿ÊÈȘ={v÷îÝ[nª¬¬lhhøè£Þÿý+W®Lœ8ñ¡‡º|ùr[»WUU !zöì©ÛêââRUUÕV»)åum`@^\]]—-[öŸÿüG­V/Y²$%%¥­˜*yòÉ'sõ¸¸¸˜Þ>f̘>ø@ñ+µZ-õQ*•^^^#FŒxá…yä‘òòòV§~óÍ7W¬XqàÀÿf›JJJlmmýüü¤ÕHÒªî]VJ¥R¡Ñhô÷•²q³`iʰºÌ\XX¨P(¢££>>,,L©T.^¼ØÎÎîË/¿lkwéÅ`ú/ÇR«ÕÎÎÎmµ-¯Ë#2eoo?wîÜ~ýú:uÊ@7'''=ºg}Mi···Ÿ>}ºöWº¬£Õj«««[ŽË–-KKK;xðà!CZnõõõmhh¸té’´ZPP 5šràAAAþþþ7nlYŒáa¥{¤…ýúõB|õÕWç~UVV6nÜ8£S¯]»örµ¢ÙÉQ*•ºÎÒtºI[îîæææíí}òäIiU¥RUWW‡††¶ÕnÊÉéÚÀ€Œ\»vmþüù'OžÔh4•••/^Œˆˆ¶666ÖêÑjµ-[í¬k7¬©©iÅŠgÏž­¬¬ÌÍÍMLLô÷÷>^º8|ãÆ­[·644ž·ªªjË–-IIIú<óÌ3™™™ßÿ}]]ݪU«êëëÇŽk`÷¤¤¤´´4•JUQQ±pᨨ()B·ÕÞº~ý Wn~ýû[{F!ßdE©TþüóÏO>ùdqqq·nÝ7mÚ4zôhiëš5kÖ¬Y£ëüý÷ß·lܰaƒôú¨¶Ú:qâÄš5kÊËË=<ø÷¿ÿÝôó0~üøÃ‡/_¾ü¾ûî«®®îׯ_ll¬ôÊe‡Ý¸qã²eËF}õêU77·¨¨¨É“'žtãÆÎÎÎ111ú<û쳕••ãÆÓh4wÝu×îÝ»uñ»ÕÝ/^¬V«###kjj¢££³³³ ··î?þÜ…)¤?êta E×?F°>EëßnøÚw,_sÅ:nÈ `€,€²@~»={öŒ9RZ1bDfffÇÖÓÒíY•ébcc7oÞÜÑUüvýüw1`tç΋õððP*•œ;w®"&&fæÌ™ÍzŽ3æÙgŸBŒ?^¡P¬_¿^·éÌ™3 …ÂÉÉIZ?~üóÏ?o`R­Vû /,]ºTZMMMÕ}P÷öa®ª6oÞéää$}²Hç§Ÿ~š:uª»»»³³sTTÔ‰'ZîÛØØ¸`ÁGGǸ¸¸²²2ÓÇ_ºté¢E‹êêêÚÍ?~<&&ÆÝÝ]©T-^¼øúõëfŸEÿü»¸¸(þ×¥K—Zîrøðáûî»O©TºººÎž=Ûð¾m[SιNÀËB‘hÕÿ zÅœ'Ùt`tzZ­v„ ½zõÊÍͽqãÆÎ;‡*„HLLü׿þ¥V«u=üñÇC‡%&þÿs z÷Ýwu[ß}÷Ýàà`ÓçÝ»wo]]ݘ1c¤Õ)S¦ÜÒîæU__ßj»¹ªrssKIIYµjU³ö¤¤¤²²²ÿþ÷¿¥¥¥aaa?üpccc³>+W®ÌÎÎ>pà@QQQ]]Ý´iÓL?44ÔÓÓó_ÿúWûAßçŸ>jÔ¨þýû>|øúõëÛ·o×jµ{÷î5q÷¶ÎvKú翬¬¬æWO?ýô¨Q£|||šõÿæ›o&L˜0yòd•J•››ûøãÞ·­skÊ9×)4ð7âüUkÏ(! Ó»zõjQQQJJŠ···½½}pp°tåvüøñ›6mÒõ\·nÝðáÃ¥x,„øÃþPXX˜——'„øå—_>øàƒøøxÓçݱcÇØ±c …´ª³kUUÕsÏ=׿ÿ=z„„„;v¬­F·ÞzkذaºÕÂÂB›¢¢"!ÄÍ›7Ÿ{î977·‰'^¼xQê3lذE‹=:((hÇŽÿøÇ?ú÷ïïâââéé9þü–U]½zõ‰'žèÓ§——׬Y³***tã¼ôÒKÑÑÑ!!!C‡ýꫯZìøñã{ì±~ýú5k?þüc=Ö·o_GGǤ¤¤«W¯–––6ë“‘‘±`Á‚Õ«Wø á}Û:·¦œs" ÓëÝ»÷!CRRR¶nÝzþüy]ûwܯ»Æ[WW÷þûïë§Žnݺ͚5Kê°mÛ¶¡C‡4Èôy¿ûî»¶.®Î˜1#//ïàÁƒUUUü±‡‡G[:O<ñĹsçrss¥Õ¬¬¬Ñ£Gûûû !žzê© .;v¬´´tèСqqqMMMR·mÛ¶mÙ²åìÙ³¡¡¡‹-Ú½{·Z­.((xì±ÇZVõè£ÖÔÔœ;w.//ïÊ•+3fÌÐmÚ±cLJ~˜———šš:}útÓO‚ ¶mÛVZZúóÏ?gddDDDx{{ëw(//¿|ùrxx¸´äààpúôiÓ§:tèwß}gÆÃ9{ölQQѬY³šµKË00¬îlO:ÕÀbÔG}dkk;eÊ”fíõõõ_ýµ½½ý]wÝÕ³gψˆˆ¯¿þÚÀ¾mÛöŸó®Š €NO¡P>|x̘1ûÛßBBB|}}ßzë-iÓܹsÏž=+]kݾ}{}}}³døÇ?þqóæÍµµµëÖ­KHH¸¥yoܸáììܲ½¸¸xçÎëÖ­ëׯŸB¡¸óÎ;ZmÔß«W¯^1116lBhµÚ÷ßΜ9BˆK—.}üñÇk×®õôôìÞ½ûòåËüñG骵">>ÞËËKÑ­[7­VûÝwßUVV:::þþ÷¿oVUAAÁ¡C‡Þ|óM77·´´´Ï>ûLwµ6!!A ä<òHqq±áGFõ9²©©©oß¾NNN;vìx÷Ýwu—Ä%UUUBˆž={êZ\\\¤F9;;ë.Úåp®]»&„hÔMVw¶ ÿ(FeddÌž=»{÷îÍÚ+++>úè£÷ßÿÊ•+'N|衇._¾ÜÖ¾mÛöŸó®Š €®ÀÕÕuÙ²eÿùÏÔjõ’%KRRRvïÞ-„èÛ·ïĉ×­['„X·nÝôéÓ•J¥þŽ 6lØk¯½væÌ™I“&ÝÒ¤½zõj5TÙÚÚ0Àhc3³gÏþðÃëëë÷ï߯V«¥z  EttôàÁƒK^^^XXX³ÆöNPP¿¿ÿÆ›µkµZÃÃêÎv[?ÊôéÓµ¿rqqiuöµk×>ðÀ­þ)D©TêV¥éôâfû¶unÛλ*0:½k׮͟?ÿäÉ“¦²²2##ãâÅ‹ºoÏŒ7ÎÝÝ}òäÉ÷Þ{oHHHËÝãââöîÝ»xñâ–›kõhµZý­±±±úWçtüüü&Nœ˜˜˜XRR¢Õjóóó Zml¶£ÍÌ™3ÿñ|üñÇÒýÏÒh111ñññÒuÈ7nlݺµ¡¡¡Ù¾ùùùûöí«­­µ³³óððP(666úF5oÞ¼ÊÊÊŠŠŠÔÔÔ &H7ôšB:ÒÛ¥³!„pww \»v­Z­®««{ûí·ëë률•““#]xB$%%¥¥¥©TªŠŠŠ… FEEI1O¿O«ãKöíÛÛ¬žöŽB¡X»víúõëçÏŸöìÙêêêsçνøâ‹[¶l1qX”–ªªª¶lÙ’””ÔV‡gžy&33óû￯««[µjU}}ýرc ìÛÖ¹m«½U¡ÍßEmqaíúÃK;h»:9#XŸH0þ°F“˜˜¨T*ÃÃÃ7oÞ¬ßaùòåBˆ¬¬,ýÆqãÆ½üòË͆ڵk—£££®C³ÿóüý÷ßëwnjj·ÕØL~~¾bÈ!úUUU©©©NNNýúõ›9sfCCƒV« Ý´i“ÔçôéÓÇïÙ³g¯^½ÂÂÂ>ùä“–U•––N›6­wïÞžžž3f̸víšÔ®?NMMâÇlV˜þ÷¢$ÒîçΛ8q¢«««““Óï~÷»;vHýçÎûÀHË óçÏwsssppˆ‰‰¹råJË>múôé¾}ûþòË/-ÏU{G«Õ;vìøƒ«««½½}``à¢E‹¤LÖÀÒŒþù×jµÿüç?½¼¼êëë[ö”455½úê«^^^NNNß|óá}Û:·mµwRæŠu íÿþ«ëQ(ºþ1€õ)Z¹…°9í;–¯Cå‚Í IDAT:ÚçŸþÚk¯>|¸£ é²âââ&Ož|K/¦F×c®X×õÃ!, ¬Æ\±Žg€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@ÈÂm€,XàáááèèWVVÖ²ÏæÍ›###œœlmm­_! 3ºíðÊ•+³³³8PTTTWW7mÚ´–}ÜÜÜRRRV­ZeýòÔmw5##cÑ¢E!!!BˆÕ«W«TªÀÀ@ý>ãÇB|òÉ'S" º½®———_¾|9<<\Z rpp8}útÇVèn¯\UU%„èÙ³§®ÅÅÅEjlÅÿZ²d‰Ô~òäI]Ÿüü|F£[Õߤ¿¬Ñhòóó[ÝÄŒÀŒ «Lt›#0#0#0#܆#$&&6 qÂLZ­Ö\cµ_yy¹»»ûÑ£GGŒ!µ8::nذáÑGmÙù“O>‰mhh0<¦Bq{#t ŠDã}´ïX¾ æŠu·×`777oooÝ_T*UuuuhhhÇVèn¯,„HJJJKKS©T .ŒŠŠ’Þ€•““³nÝ:©Occcmmm}}½¢¶¶¶¶¶¶#+t·]^¼xñ¤I“"##}}} Evv¶Ô¾gÏž­[·JË6lP*•“&MjllT*•J¥òúõëW2 èúÏÇò 0XÏ«éšÏ`!`€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²@È `€,€²pkxݺucÇŽµP)Xέà^½zùûû[¦,H¡Õj;ºËR(ºþ1€õ)÷Ѿcù:€ ˜+Ö¹|ìØ±‚‚ÝjAAÁñãÇÛ?+Vf$Ï;·¡¡A·ÚÐÐoá’0?#בœœ4B¡VµZ­³³³F£±JmæÁ-Ð` Ü ¬ÆJ·@»»»—””èV‹‹‹]]]Û?+Vf$ÇÄÄÌž=ûÂ… Z­¶  `Μ9111Ö© 32€_}õU''§vëÖmÀ€=zôX±b…u*ÀŒLº‘º¸¸¸¨¨ÈßßßÏÏÏ 5™Ï€%ð 0°+=,„hjjR«Õ555~~~ íŸ+3€/^¼x÷ÝwGFFΜ9S±mÛ¶¹sçZ¥0ÌÉHNNN?~¼Z­îÕ«—"::úàÁƒÖ¨ ³²5¼ùèÑ£ü±´êêêZ^^nùª03#W€Õjµnµ¸¸ØÓÓÓÂ%`~Æ¿éÒ%!ÄÕ«W“““§L™b•Â0'#xÅŠ666¾¾¾ùùù}úôéÞ½û+¯¼bÊ0#“>¦téÒ¥ÂÂÂ~ýúñ`€„ï«1W¬3ò,‰F£©®®vpphÿ¬X™‘[ çÏŸâÄ !ħŸ~êîîîêêº{÷n«€9¹ŽìééYTT¤T*GŒ1{öl+VœýôÓ#FØÙÙÕÔÔ˜kâ•+Wfgg8p ¨¨¨®®nÚ´i¦÷IJJ*++ûïÿ[ZZöðÃ766š«0@—dä%X)))aaaZ­ö£>B|ýõ×AAAf™8##cÑ¢E!!!BˆÕ«W«TªÀÀ@Súœ?>99¹oß¾Bˆ¤¤¤7ß|³´´ÔÇÇÇ,…@ûq{0ÀmÈÈàyóæ>}úìÙ³±±±Bˆ~ýú­]»¶ý³–——_¾|9<<\Z rpp8}ú´‰},X°mÛ¶ÒÒÒŸþ9###""ÂÛÛ»ýUº0#Xqçwöïß_·|×]wµÖªª*!DÏž=u-...R£)}FŽÙÔÔÔ·o_''§;v¼ûî» …ÂÀtŠÿµdÉ©]ÿm^ùùùF·ª¿IY£Ñäçç·º‰AÝæGa`ÝæGÁŒÀŒÀŒÀ·á‰‰‰ÍBœ0“ŽyCryy¹»»ûÑ£GGŒ!µ8::nذáÑG5ÚgÊ”)÷Ýw_zzº££ã»ï¾û—¿üåÌ™3ÒÑ-ñhÖ'‡[ åpŒà6a¥·@[ˆ›››···.ñ«TªêêêÐÐPSúTTT\¼xñÙgŸuwwW*•)))uuuGµö1:•Ž ÀBˆ¤¤¤´´4•JUQQ±pᨨ(é X999ëÖ­3ÐÇÝÝ=00píÚµjµº®®îí·ß®¯¯ožhƤ\ZZzüøñÒÒR3N¼xñâI“&EFFúúú*Šììl©}Ïž=[·n5ÜçßÿþwEEÅ€ÜÜÜÞ{|œœš±6@×cäFêëׯϜ9s÷îÝÒêC=´iÓ&WWW«Ôf< Àúäð|¬ŽÜ&¬ô ð¼yóšššòòòjjjΜ9ÓØØ8oÞ¼öÏ €•ÙÞ¼wïÞ3gθ¹¹ !‚ƒƒ7oÞb•Â0'#W€µZ­­íÿ…d[[[n'tFFðý÷ß?gΜ’’!DIIÉœ9sî¿ÿ~«€9 Ào¼ñÆõë×ýüüºuëæççW^^žžžnÊ0#CÏ×ÕÕ}õÕW‡*((())ñõõ °Ze˜‘¡wIkµÚ!C†üðÃÖ,Èìø ë“Ã'‚äpŒà6aÏ ) OOÏ+W®´:–‘Ï ýþ÷¿9rd|||ß¾} …Ô8}útË€9¹Ž|ø¾ûîS*•®®®³gÏ6KI€.ÌH>zôèÒ¥Kmll¤UWW×òòr³L¼råÊìììÕÕÕM›6Íô>ß|óÍ„ &Ož¬R©rssüq³”èÂl ovttT«Õ½{÷–V‹‹‹===Í2qFFÆ¢E‹BBB„«W¯V©T¦ôyùå—gÏžý /HÝüüüÌR  3r8&&&>>þÒ¥KBˆ«W¯&''O™2¥ý³–——_¾|9<<\Z rpp8}ú´)}êëë¿þúk{{û»îº«gÏž_ýµáéÿkÉ’%RûÉ“'u}òóó5nU“þ²F£ÉÏÏou#0#0‚þ&2o †™x&Žf¢Ûá·`F`F`F`„Î5Bbbb³'ÌD¡Õj l®ªªzê©§vìØ!M·qãFGGÇvÎZXXðÃ?I-ÞÞÞK—.7Ú'66ÖÃÃÃÛÛ{×®]ƒ~ã7^ýõ³gÏz{{·~„ #Çf§H4ÞGûÎí:c\œI3öÞn¶ 2W¬3rØÙÙyûöí%%%‡***Ú¶m[ûÓ¯4¬¢²²R×¢V«¥F£} E|||XX˜R©\¼x±Ý—_~Ùþª]˜ñï !|||FŽiÆGmÝÜܼ½½u—¼U*Uuuuhh¨)}”J¥þ£ÂÒ¥i3^tIF^‚¥ÕjwîÜyâÄ ýû¶ÓÓÓÛ?qRRRZZÚØ±c=<<.\%ÅÚœœœ7n$$$èóÌ3ϬZµjòäÉo¼ñF}}ýرcÛ_  3€“““·nÝm–;Ÿõ-^¼X­VGFFÖÔÔDGGgggKí{öì)**’p[}ž}öÙÊÊÊqãÆi4š»îºk÷îÝ^^^æ-ÐÅy’¸gÏž'Nœ4hÕ 2;^‚Àúx Ö­Í`•^‚åâââááÑþièXFð²eËæÏŸ¯V«­S b$‡‡‡:t¨W¯^Nz¬Sfdä%X?þxDDÄ;ï¼cö—``MF°J¥úöÛo•J¥uªÀBŒÜ=bÄ•JeR°#W€£££'Ožœ˜˜Ø·o_]ãôéÓ-\àW¦|”h»ñ/ÀHÎÉÉéѣLJ~¨ßHt:Fpnn®uêÀ¢Œ< @×ÐúàÌÌ̈ˆˆàààÌÌÌ–[ããã-\fÖfvqq!ºŒÖð·ß~Ûl€NÍÈ3ÀS§NmÖm±b°#øÔ©SÍZNœ8a±b°”6?ƒ”••%„Ðh4Ò‚D¥RõéÓÇòU`fmà5kÖ!*++¥!ÄwÜáååõÞ{ïY©4̧Í,Ýêüüóϧ§§[±,ÂÈ3À¤_@×`$Ð5€²@ÈB›/ÁÒWVV¦Ñht«´X=X„‘|èС™3gë7jµZK–€ù¹:99yùòå5z¬Sfdüè3fX¡,ÊÈ`//¯k×®Y§,ÇÈàðð𨨨„„www]ãôéÓ-\ff$öÙgÝ»wÏÊÊÒo$:#877×:u`QFž–”––?~¼´´ÔÒÕ`!Fðõë×z衾}û>¼oß¾?üpEE…u*ÀŒŒàyóæ555åååÕÔÔœ9s¦±±qÞ¼yÖ© 32ò ðÞ½{Ïœ9ãææ&„Þ¼ysHHˆU ÀœŒ\Öjµ¶¶ÿ’mmmµZ­…KÀüŒàûï¿Μ9%%%Bˆ’’’9sæÜÿýV) s2€ßxãëׯûùùuëÖÍÏϯ¼¼<==Ý:•`FFžîÓ§Ï¡C‡ JJJ|}}¬Sæe$Kˆ¾€N­õœ™™œ™™Ùrk||¼…«ÀÌÚ À...`@—ÑzþöÛo›-Щy ôÔ©S›µDGG[¬,ÅH>uêT³–'NX¬,¥Í·@gee !4´ Q©T}úô±|U˜Y›xÍš5BˆÊÊJiAqÇwxyy½÷Þ{V* ói3K·:?ÿüóéééV¬‹h3K–,Y¢V«›5º¸¸X¬,ÂHîÕ«WËF­Vk™b°#¸¤¤D·|ýúõÕ«Wÿþ÷¿·pI`fšZ#컉n6V)ÇHöññÑ_ÎÊÊŠŠŠJII±pU`6ŠD“ºiß±pèhF¾ÜŒ­­mee¥…JÀrŒ\þâ‹/t˵µµ»víòôô´pI˜Ÿ‘ÿûßÓÓÓÍR  Ë3€cbbfÏž}á­V[PP0gΜ˜˜˜öÏZ^^~ùòåððpi5((ÈÁÁáôéÓ&öÑjµsçÎ}íµ×ÜÜÜL™Nñ¿–,Y"µŸ¼ý%º0#W€“““ǯV«{õê%„ˆŽŽ>xð Y&^¼xñ¤I“"##}}} Evv¶Ô¾gÏž­[·èãàààó+///!DïÞ½{öìi–ª]•‘'‰ÝÜÜJKK»wï>xðàsçÎ !zôèaöw„Z/ÁdŽ—`YdF^‚¬È\±ÎÈ`GGGµZ­[-..öôôlÿ¬X™ñïÇÇÇ_ºtIqõêÕäää)S¦X¥0ÌÉH^±b…¯¯o~~~Ÿ>}ºwïþÊ+¯X§2ÌÈøw€·oß~éÒ¥ÂÂÂ~ýú™ë;ÀX™‘ÜØØhcc#½rYj¹y󦓓“å ÀœŒÜ=vìØ+W®èVóòòÂÃÃ-\æg$÷ë×oذaû÷ïB¼÷Þ{O>ù¤U ÀœŒÜ•••••vþüù]»vEEEY§2ÌÈÈ`!ÄðáÃûôéóí·ßŽ1âž{î±BM˜‘¼iÓ¦áÇ?ñÄ/^Ôh4ááágΜ±Ne˜‘‘[ SSS?þøã±cÇ !>ÿüóåË—ß{ï½UUUV© E¢IÝ´ïX¸ +2€sss½¼¼¤å;î¸ãÿý¿ÿwß}÷Y¾*̬Í[ ¿øâ‹¦¦&)ýVTT455 !‹‹‹­WfÒfŽŽŽ®««“–ï¼óΟ~úIQ__?{öl+•€ù 4] `€,z ô‚ lll„?ÿüó’%Kœœœ­UæÔf7nÜ?þ(-GEE]ºtI×nº0«6ðçŸnÍ:°(žÈ `€,€²@È `€,€²@È `€,€²@È `€,Øvt€<Äřԭ÷v ×ÈW€²@È `€,€²@È `€,Øvth[\œñ>Û·[¾è ¸ ®À­àR @§E0€ÎŒ[ ²@È < ´I‘hR7í;®€9p `€,€²@ÈB‡àÆÆÆ xxx8::ÆÅÅ•••™ÞçÅ_:t¨ƒƒƒÏ3ÏÉÉÉéééÖ¯йtÌàòòòË—/‡‡‡K«AAA§OŸ¾Õ>Bˆ#GŽ„……žNñ¿–,Y"µŸ__ßÚÚÚ_~ù¥£ŽÐYtØ[ /^¬V«###kjj¢££³³³¥ö={ö%$$´ÕçæÍ›iiiBˆÁƒK»ØÙÙÕÖÖvÐq:‡ À666«W¯^½zu³öÌÌLÃ}œœœx©€>±kv¶¢{‡ýÿ>¸]ð?„žósÆûðbv C–LûÊ‹0í£D€Ã'‚·‚ Ì„8*„0ûçÇæÓaoÀšÀYàh ¦ÜÍέì@çÂ`üì½y\•ÕÚÿ¿6( ;ÄGp894‹'r@Ë13µÁ2ÒR½ÌS¨œæŽOÚà@*IV¯èÒƒ}ÔÒN–Á1³Ô4ÐÒ@¡@7£‚hm¼Ü¿öÃÃcv¯µö}]Ÿ÷_››^¼ä³¯µÖµÖ` €hØ™Ë9r×Y!V€0`,@ 4@”ººÿã?ðó¾¾J¾À+ÀÏåìWBl´n˺z#àõ` Ê\ÎU®·¹0€:0 ‚€`Z ¾ !+0^Ž*È`pÞ:½œe ¬Q.À^Ïå G7l¾¾ò¿ƒ·Î»€ÿA.§JS_¢ýú«hÖLµì †ÜKp 4` €`ö€Q©»}¯‘Çðpß ´@`V€€ ƒõXú¹œ«ûZZzoŸz£B0`0ÀŸ`€Rì{d°;ƒ+K1{C°°`,@ 4@8 è+ÀX€`)¨? GRÀ.ç ÐÊ/µÖà À0åW·8ëþ?þ› %ßÊ•ùðÁ¯@}OŸÿ ÕFô‚0àOs9K£Bˆ–X:ÁÖà$F€&Žp!0ù ì‚úé~,0x3¸ 0À´@¬ãr¶a¯ÐÀÐÃQ€0`†ú’q&3Þö`ÀX€0` €`ÀX€°õ<÷ÜsŠ‹-Rl\õ! Þð*8Ä‚` [ÏóÏ?¯Øˆ0^…úPoxbA° €°À–àúúúÙ³gGDD8Î1cÆ?~\÷wÀÛ±åxÑ¢EYYYŸ}öYaa᯿þz×]wéþŽx;MtWÂêÕ«çÎÛ³gO!Ä‹/¾Ø½{÷C‡ÅÆÆêþ¾x1†Ý(++Büç?ÿñ< ÌÊʺدû àÏbÉpÒ~+À'OžB„„„xž„††š/ÆÀ„÷ !ª««=OªªªÌ‡ÀŰß8,,¬mÛ¶ß|óùá¡C‡NŸ>Ý«W/½ß/ÇaÇá ¬]»vóæÍIIIÕÕÕÛ·o×ýMðjì·Xñü£ªª*>>¾®®.111++K÷wÀÛ±å 0ðG±ß`à À |Ÿ{î9ÝßÞEQQÑþýû}}}•]¯#ŒàbÔ××Â#ŒÚLàð¿Fíb”‚¬à†K£]Œ§N;v¬Ãáp8BˆqãÆÕÔÔÈÁã•Ñë’È0ê’š,^¼ØÇǧiÓ¦›6m2 #++kÙ²e0££ú7e-Rÿ+a„Ñ.FIà,kHOO¿Äg§L™£-ŒsæÌÉÏÏýõ×o¾ùæO>ùdêÔ©±±±K—.µ\#ŒWFFF†ùâ§Ÿ~Z¹reRRRçÎKJJÖ®];f̘—^zÉr£.©I×®]?ûì³½{÷¾øâ‹Ÿ}öÙ‘#GFŒ‘ŸŸ#Œ0ª7ªSÖ"åð¿Fíb”…î8^DÇŽ;fFll¬a%%%íÛ·‡F/4ÆÇÇïÞ½Ûóá‰' $Õ¨EÚ¯_?Ã0êëëÛµkgƹs箺ê*a„Q‹‘ þW£]Œ’À!XVRPPðÅ_ÀhSã‰'¢££=¶hÑâÔ©S0Âè…Æï¾û®wïÞž#""Nž<)Õ¨E\^^îãóÿ¿U:t(44FaÔb4Q_(–rø_ #Œv1J`k8qâÄÀ;vì8hРŽ;<ØårÁh;c‹-*++Í×õõõóæÍ»é¦›`„Ñ :uZµj•çí[·ò÷³¨—vëÖ-!!aÙ²eµµµ¯¼òÊÈ‘#'L˜#Œ0j1ªSÖ"åð¿Fíb”ö[Ã=÷ÜsöìÙW^y¥]»vÇŽ›9s¦Óé|çw`´—ñþûï9rä¸qã"##ëêê:tèðÿþßÿ‹‰‰Fo3îØ±cøðámÛ¶5·ã>|ø£>Š—gÔ"½ñÆÍ‡#<<|àÀ3fÌðóóƒFÕÕ¿)k‘rø_ #Œv1J`kˆŒŒüá‡BBBÌ«ªªâââJKKa´—±¾¾Þ0Œ&Mš|üñÇ‘‘‘ùË_|}}åé`„ñÏàr¹²³³‹‹‹£¢¢ÆŽ.Û¨K ðÔ¿)ë’hƒhkp8õõõžëëëÍÛP`´—ñ‹/¾hÒ¤‰bèС½{÷öõõ-))F/4 !"""† ’˜˜8dÈeQÅÒï¾ûî‡~0_üøãôŒ&EEE;vì(**R£ÓblȬY³`´Qý›²)b‡’±!ôþ$y­A×é[ĸï¾ûn»í¶¼¼¼ºººï¿ÿ~èС“&M‚ÑvÆ   ùó盫y†alݺµeË–0Âè…ÆãÇ0@a¼ tâÄ ©F-Òo¼ñ“O>1 cãÆ~~~+V¬ f¬®®=z´Âì=ztuu51cNNΨQ£®»îºk~Çßßß|£Œêß”µH;4Œþ$9%°5¸\®[o½Õ3­0lذòòrmg-iÿŒ2Œjjj=q»Ý0Âè…Fò›ñøðÓO?}þùç?ýôIc—.]Š‹‹?üðÃ[n¹Å0ŒÃ‡ÇÅÅÁh£–7e•;0z¿Qÿ)¢¢¢~þùgÃ0ºtéRVVæyîr¹ºté£]ŒØÊÊÊýë_ô¤7\Ã0Ö¯_o¾ `T¹”zc¿~ý 误o×®açλꪫ`´‹QË›²ÆJ±CÃHøO’•QMdï1¦ÍÈ‘#Ç¿`Á‚Ç{ì®»îzúé§Û·oôèÑyóæÍ;F»{÷î}‰ÏîÝ»F½ÄسgÏæäälÛ¶mÛ¶mß~ûmÇŽï¸ã©FõÒ)S¦\ðy÷îÝ/ö)ÛgÍšÕ¼yó£G¶k×îØ±c3gΜ5kÖ;ï¼#Ã¥Ë\^^f~xèСÐÐPy:­Eý›².© b‡†‘ðŸ$+£,tÀíÍÉ“'§OŸ~±ÿ÷0ÚÅøöï<ÿüó‘‘‘)))éééóçÏïСƒ¤ƒ(`„ñO’˜˜Ø¬Y³ÈÈÈ»ï¾ûõ×_7ïæ•)yÔ_.¥Þø÷¿ÿ½GK—. {ùå—;wîüøãÃh£ú7e]R> v`´‹QÃ0..àqêÔ©³gÏ6z£½Œýû÷_¶lÙ5×\c~èr¹î¾ûîmÛ¶Á£·###Ïœ93iÒ¤ÄÄÄ[n¹%((HžK¯”<­ZµÊËËkÑ¢…ùayyyÏž=KJJ(o¼ñFó…Ãá8pàŒ3üüü`´—Qý›².)y;0ÚÅ( €ø‚ƒƒ+++}}}=O®¿þú]»vÁ£· Ãøî»ï¶mÛ¶uëÖ;wöèÑcðàÁÏ?ÿ¼<£é›Ì¥ö–«7Þÿýeee/½ôR‡~úé§Ç{¬U«Vo¾ù&%#6±CÃÀ%ðÑý Ûžž^__ßèá–-[ämáPoìÔ©ÓªU«<nݺUö Œ0^‡£W¯^?þø›o¾ùÊ+¯¸\®yóæI5j‘&7`Ò¤I¿þúëðáÉ_~ùeÃ0ºwïÞ¬Y³=zøúú¾üòËÄŒØÄ #—+Ààrq8uuu fee-_¾<''‡†qÇŽÇoÛ¶mçÎKJJ>üÑGÅÇÇËpÁãŸaÓ¦MæIT?ÿüó€;vì(ϨKÚ—ËuÏ=÷lݺ•ž±¤¤¤¨¨(::ºuëÖ²]ê–Ô8y‚ر©‘ß$£$p 4ølÞ¼¹iÓ¦ Ÿ”••íÛ·Œñ¯ýë‘#G²³³‹‹‹GŒ1vìXÙ`„ñÊ>|xppðœ9sfÍšåt:¥ºôJ!uϘãwß}еk×Ö­[üøã]ºt!fLNNö¼v¹\kÖ¬3f Œ¶3ò±cS#‡?IFYh8x‹"§Njx¡Ù¹sçJKK‰…¾Œ‘§N"oäÀ[o½5qâÄÖ­[ûûû8ðŸÿüç®]»èIëPYYùæ›oöìÙ“˜ñÆoüä“O ÃØ¸q£ŸŸ_@@ÀŠ+ˆqâĉÁƒÃh;£úÂC‹±CÃØ’’ V°5L›6ÍsëIyyy=„;vþøã!C†„„„<òÈ#擞={9r„’‘ü~õóiÒDCžË–vîÜYê×÷cLLÌÆ].WÏž=CBBjkkýýý‰›5kÖðÃèèè·ß~FÛÕZ¤ˆF’Œ’ÀØš6mzîÜ9!Ä™3göïßß·o_!DxxxMM £çPŸŠŠŠ“'O{.4'c¬©©©««‹ˆˆ0?4 ãĉ‘‘‘”ŒBˆ˜˜˜mÛ¶%$$øúúNŸ>]QTT$õw«Þ8nÜ8y_ÜKŒ€óæÍ»óÎ; ÃØ°aƒâû￟8q"1ã?þèy$;â`”„úÂC—”<ˆíb”…ÎågB\wÝu«V­2 cíÚµQQQæÃ¼¼¼N:‘1ºÝîùóçGEEyþñDEE-X°àü­ö5²Ú˽ÿþˆˆˆñãÇ/[¶¬[·nÓ§O§gT†z# Dmmmuu5m#y8œ/¨¾ðÐ%åb‡bG[Ã;ï¼ãããÓ±cGŸÔÔTóá+¯¼òàƒ’1&''ÇÄĤ¦¦ææææåååææ._¾<::zöìÙdŒörûûûŸ9sÆ|]PPpÇwtèÐá¾ûî“÷¶¤Þˆ0@1‚Áù‚ê ]Rl‡Ø‘À–ñïÿ{Á‚›6m¢j ;xð`£‡û÷ï'clÖ¬ÙÉ“'Í××^{mVV–aEEE”Œp¹\äLî#á`4 Î4t”:ꥈ`˜ÄŽ 0—K```yyy£‡'Nœ $c 6'¬««ó÷÷ÿñÇÍ×dŒ¬(///((8ÿ_#“ûH8 „ùùùÑÑÑ o7-))iÖ¬#;À. v®‚e ·ÞzëÅ>µeËÆÄÄĤ¤¤Å‹ÇÆÆšOòóó“““‡ "C§Å»nݺiÓ¦­[·.""Â<˜±   mÛ¶dŒ¨¯¯_´hQZZZqq±ù$**jÚ´i)))¾¾¾4ŒÛ·oÏÈÈ0_/]º4$$ä›o¾Y¶lÙœ9sÞ{ï=F-RFà|AFõ…‡)b‡ˆ2F)è“áÑßy衇Í×3gΔ÷Vo,--MHHB8Î6mÚ8N!DBB‚¼FõF{¹9Àa¿º–æy{Ô9ô[rXR ÎT_xh‘"vh;4Œ’ÀØbJJJ"##Í׿ýö›‚)ÅÆüüüÌÌÌ×^{-333??_ªK‹‘ü^npد®¥yžÃõFUΤaô ¾ÔQ)EìÐ0"vh%°ÅTWW™¯«ªªäíVÕh¶†Ã¼/‡ýêLî#á`äP¥qXRc…–ÂC™±CÈØ—`뉌ŒÌÈÈ0÷öîÝ›¤‘*C/#‡yßÑ£G5ªá’l^^Þ°aÃn¿ýv2F&÷‘p0r¨Ò8,©qCKá¡FŠØ¡aDì€K€°õ¤¦¦ !|}}›4i²aÃ’FªpØËÍaÞ—Ã~uƒÇ}$Œª4KjÜÐRx(“"v;à`,…½{÷®[·îü=”Œ´!¼—›Ã¼¯ ùýꀪ4Kj ÑRx Ú±Ä #¸b†aÈ}z``àK/½$„¨¨¨¸ù替ÿþûŽ;nÞ¼¹k×®4Œ iÕªÕK/½t÷Ýw/^¼8++kÏž=4Œ×_ýƒ>8mÚ´7Þxã™gž)**Bäçç9òðáÃ4Œ˜5k–ùâôéÓï¼óÎC=$„0 cùòåòR]½”ƒQ J#«9bÚ vÈɃعb0—K·nÝ222®¹æ!Ä3Ï<óé§Ÿ®X±bÙ²eÕÕÕ’.jWolÈ«¯¾úÈ#øúú:ŽÌÌ̱cÇÒ02™÷åCiiiïÞ½KKK…n·»iÓ¦ R]½”ƒ‘0–Ô¸Ís±ck;4Œ’h¢û¶áèÑ£žÜ›7ož={vß¾}.\اO2Ɔ̘1£ÿþùùù}úô‰%c¼çž{Z·nÝhÖ3ýLÃÈŠÀÀÀÓ§O›¯kkkIJ 9Tiqqqæ‹ó¸dè´·oßž‘‘a¾^ºtiHHÈ7ß|³lÙ²9sæHš±UoäbÇÖFÄ £$°lWjjj®ºê*•Fìv[Súv}-RªF&ý–&„—Ô?$„¸îºëfÏž=~üøâââ>}ú¸\.F-¨¯v‚ر¯ÑbÇÖFYÈ8Y‹!kÖ¬q»ÝnÞ¼9##C’Qñøã7z˜””4wî\IFgrwÓ06„ö}$|Œ„Ÿ÷P]]d¾®ªª $cäp¯ŒúRÇÐQí4±CÀˆØ±µQ>:Ýy衇Ì?ã†TWW¯\¹Ržtýúõ .løäž{îùè£$éfÍš5cÆŒN:M™2%%%Å|øñǛןÒ0r îwÚµk·cÇóullìÇLƸ}ûö{î¹Ç|íiÑéß¿ÿœ9sÈ2cÆŒ½{÷fdd8p@Áfu]RòFÂý–‚ƒƒßyçsçέZµJÁž1eÆØØØuëÖ !Ö­[ѹsg!DAAAÛ¶mɵ”:ByµÓÄ#bÇÖFYèA‘ýÁÿ&55ÕétÊ3æççGGG¯X±Â󰤤¤Y³f’Œƒ«áYAxÞ—Ï]Ç€‘‘‘õõõ‹-êÝ»7I#Õ%5—Žª/u MÕ+;ö5rˆI`l æ¿ò "ÏXWWwèСV­Z­\¹Ò|¸k×®¶mÛJ2œ:uJ—MA¶6’P­Ò±wïÞuëÖûì³&LX¾|ù¤I“F%ÕH›[/#“ûH8Ü0d ©gçr |¯ŒúRGè¨v;àÊ@ìØ €-CqxíÙ³'11Ñ|°qãÆ&Mš8Ž´´4IƆ<ýôÓ'Nôóó{úé§Ÿzê)’F5øûû;!DÏž=wíÚåv»—.]zíµ×*8‹0î:BlÞ¼ùÃÿMYYÙ¾}ûä…üqZZšy†§IÏž=9"ϨþÇTÿ3*F}•–žž^__ßèá–-[ÞyçIF-C5ÚsÄêÇiZªÄŽå v¤B;v¤ { šBù=u~~~æµ+†a>|Øì˜­««Söÿ”äò.—KÞê!Ü$xÜG¢þÇTÿ3rè·4«ffföë×Ož±]»v ,høpëÖ­={ö”dlÕ{eÔ—:†Žj±#ÄŽl¨ÆŽ$0¶õáÕ¾}ûÿüç?æë>ø OŸ>†Ú0I˜lpR î:–ÁßÚeã‘îß¿?""büøñË–-ëÖ­ÛôéÓe•á%?#±*M‘ýÁÿ&55ÕétÊ3ªª5„ä±úRÇÐQí v$;²!;’À`ÉÔ‡×Ì™3ûôéóÅ_lÛ¶­k×®O?ý´ðŸFý ebbbZZšùzÛ¶mS ÉÈaÞ·!‘‘‘õõõ‹-êÝ»7£–°¿¿ÿ™3gÌ×wÜqG‡îˆ<ã& IDAT»ï¾êêjIFõ?¦–Ÿ‘|•ÆaIêKCGµƒØ‘dDìïƒ%kP^·Ýv›Ãáp8wÞygMMan·û•W^‘dä€úÊðððŸ~úÉ|ýå—_¶lÙrÆŒæÙ’Œæ}Bµ)ˆI»>‡“C•ÆaIêKCGµƒØ‘dDìï÷Û›“'Oúøøàú8«P+¯¿¿ÿ‰'BBB„GŽIHH(..>sæŒ9'*È»Ži™ÜÉáÇt8uuuê8pà€âããW¯^}Ë-·H:çVýÏP]]íïï/„(,,|üñÇ÷ìÙÓ¿ÿÔÔÔàà`eß° •ÕbGª±¼ €­¤¼¼¼yóæ>>JÏÖ>sæLmmmHHH“&MTz þMÈŒËcÇŽ%$$<óÌ3Ó§OB|ýõ×cÆŒ)..–aŒ‰‰ÉÌ̼ñÆ…~øá3Ï<óí·ßÊûúú^ðS’n{Rÿ[å€ÃáxüñÇ_|ñņ|ðÁ–-[Ê;B|È!wÞyçÃ?,„øôÓOׯ_ïùÔêÕ«eÕÿ˜Z~FòUZYYYxx¸Œ¯ Ô£¥Ô «ÄŽ ;À»Ð»M†ƒšK=:t8|øð[o½Õ½{÷Ñ£G—””È“¾úê«Ý»w7/ÑiÚ´éÀwîÜ)OÇaï¨`°—[ )ˆBGc¹–Ž}ò»8ô[â|A¨ÿ­j)u åÕb‡ˆù­bl #FŒ˜8qbnnîÝwß°bÅŠøøøqãÆI2.X° mÛ¶Ï>ûì³Ï>Û¶mÛÇ{lÆŒN§sÇŽ’Œ8Û;Êa/·ú°–mcälî#!Ùz⨟ÍÄù‚2P_ꚪÄ;2Pÿ[•ÀÖТE‹ŠŠ Ã0Nœ8!„øùçŸ ÃøùçŸÃÂÂ$Û´ióÝwß™¯¿ýöÛ¿üå/†a¼üòËýû÷—d Î ÔEuuµ²w#ó¾œî#¡}Ç*Ã’‡9bõ¥Ž¡©ÚAìXbGbG[ÃUW]uöìYã÷À2×ñjjj<7‚Ê3†qúôéàà`Ã0Ž= É(œhRVVV__/Uq>uuueee¿ýö›l‡y_õ¨ÿ­ji,ר±O{W‡*ü’‡9bõ¥Ž¡©ÚAìXbGbG[Cß¾}W­ZeÆŠ+Ú·oÿè£}úôU«VíÙ³çóÏ?—aŒ=zôœ9s ÃX´hч~˜››{ìØ±Þ½{WTTÈ0:Žººº€€€ 8pÀ€ñññ«W¯¾å–[V®\)Ãøè£îرcÙ²e¿þúëßþö· &Ì›7ïÌ™3æê¨ £çg”ñÅ/mTö[9rdhhèßþö·+VüòË/Bˆ;ï¼333³M›6ï¾û® £báÂ…«V­š2eŠ"==}„ gÏž}óÍ7·lÙÒ¿Fõ?¦úŸQ'Ožôññ¹êª«”ËËË›7oîãã£Ì(„8sæLmmmHHH“&M¤ŠjjjTþ2õ¢¾ÔQ)Eìȱc-¬bÇb4¾I¢þWªÌ(ìå°—›Ã¼¯`ÐÄä>»Lh÷[rXR Îô ¥zT&EìH±c!¬bÇZ0¶Ú`ò{G {¹u¦ kar ‡] ª4œ/(õsÄh€;’@ìX«Ø±›}»ÞOee%U£`°wÔá½Ü&´ç}9ìWgr ‡æ8Ti–Ԙ̛¨/uTJ;2@ìÈ€UìX‹Í¾] ÁàÌ@„Wò9Ìûrh br ‡] ª4Kj¬æˆiƒØ‘bGˆ+‡`YCïÞ½/ñÙ½{÷0TWWûûû ! üñ={öôïß?55588Ør¢²²òÞ{ïݲe‹bìØ±o¾ù¦Ó鬯¯_±bŬY³d=>ÌŒÃñ‡£]»v?üð“O>éy¸mÛ¶¿ÿýïû÷ï—aŒˆˆØµkW‡„999wÜqǸqãêëëW¯^-ïë?þ¸oß¾~ýúµiÓÆ0ŒŸþ¹U«V²Üøå—_¶mÛ&„4hPÛ¶m¥ºLÔÿ˜ŠÆ   òòr???ód¸šš§ÓY[[Ûºuë“'OJ5 !êêêZµjU]]}ìØ±ØØX³Ž±œ/(èþ|Aõ…‡.)bGžQ v¬ƒCìÈBÓÀ›Ï=÷\ëÖ­Ÿ|òÉ5kÖ<ùä“­Zµzî¹çÞþF]¨Ü;êAýŸ†2#‡y_Á©)ˆö}$Œú-9,©q8_PKᡱÚAìXbGbG[C||üž={<~óÍ7ñññÄŒ&´÷Žz ¼—MA2ÐØ¤e“ð ‘z#‡*ÍÀù‚ÒP9G¬¥ðÐUíˆKAìPBËÒ”µ`l AAAn·Ûó¡Ûí–z«#‡½£à0ï+°_œ”¶‘O•FxIÍ„ö±úÂC—Ô±c-ˆIÐŽI`l ½{÷67û™¼úê«æ%#‡3{]F󾬚‚0&c$_¥¤ÿ?r˜#V_xè’šþçê±ck#‡Ø‘Á²†;v >¼}ûö111EEE›6mЧd ;|øpóæÍ].WË–-þùç6mÚüòË/W_}uYY™ cÛ¶m·lÙò—¿üE±gÏžI“&}÷Ýw¯¼òJvvöŽ;dŸþù´´´|0&&¦°°ðõ×_Ÿ6mZ§NÌÏNœ8‘€Q08ÞC'Ožôññ¹êª«TJ«ªªBCCUµH9 Ÿ½ÇÁÈá|Aõ…‡.© bF/7rˆI`l.—+;;»¸¸8**jìØ±áááČΠ4µöœ9ùí·ßΜ9óË/¿”áÒeô‘‘!i€í%ÆòòòæÍ›ûøø(3 !Μ9S[["û4f@ÂU#‡9b¡£ÔÑ%eá?IF&±# €ÁåÂá@ùàààÊÊJ___óÃúúúæÍ›KÞë2z ü–pèСѣG:t¨C‡[·nÍÉÉyá…ºté²zõêV­ZI’®\¹òÕW_ÍÏÏ7 £iÓ¦ýë_ÿùÏÞpà ’tLî#á`ô@øOÒá%5sÄL@ìH±c!ˆ+kÖ`n¤lÄÎ;7lØðâ‹/îܹ“€ñ©§ž?~|rrrÓ¦M¿üòË)S¦,[¶,""âÃ?´Üe²dÉ’Ûn»mÉ’%Bˆ³gÏnÚ´IqðàÁÑ£GK2vêÔ)==}êÔ©æ‡iii;w–äÒeä@rròu×]÷Æo¬X±bòäÉBˆéÓ§gffΜ9SeSРAƒä53æÍó’P/å`ôPYY©À¢×¨¾]_™±k×®k×®6mZzzzûöíŸ|òIsޏOŸ>’ŒW_}õÒ¥KÍ9âW^y¥GæófÍšI2ª/<´H;ÄŒˆ Q;’À °5¤§§ŸÿpÊ”)yyy¹¹¹fYlw£`°w”Ã^n„ç}94iižç°G@ã®ÂpXRÛ¸qãøñãýýý=sÄ;wî4爯¿þzËuBˆÜÜÜÛn»Í\Š1çˆû÷ïÿÉ'Ÿ¬[·î7ÞaÔRx¨—"vh€Ø±\'tÄŽ$0Ú{GÉïåö€¦ F¡ª)HKó<‡=êª4œ/( &ç *±ƒØ±‹Q v®©gLsæÑG¥jTÿÏÿPÁ‚Ã]ÇLî#á`|î¹çZ·nýä“O®Y³æÉ'ŸlÕªÕsÏ=÷öïÐ0ÆÇÇïÙ³Çóá7ß|/C¤Ñèý2Ô—: ¤ˆFÄ1£µ`Ørss_xá…’’’sçΙO8гgO!ÄîÝ»i=nå°—›Ã¼/‡¦ &÷‘p0rè·ä°¤æð[¤–ÂC½±CÈØ!f´{÷ÞC·nÝFg^ -„˜;wî /¼ „xàh=þ3ã°—MA’PßÄä>òFUZŸ>}¦M›æ9íoåÊ•éééß~û-%£Âo‘Z -RÄ#b‡˜Ñb4¬:S$44´Ñ“N:3z¨¬¬T#Òh$ š‚ˆÝáÐoùÅ_õìÙsĈ=zôþòË/‰=¨¯¬”µ« vh=ŽIØ|øî5$&&nݺµá“qãÆ­_¿ž’‘'³fÍZºt)%#‡y_„çD™ÜGÂÁÈ¡ßR0XRó@ø|A-…‡z)bGˆ©ŽI` .{G9ìåFS #“ûH8§* ï±# Äð0— ‡½£ör3™÷5!<ÀFpXRã0G €@ì v.Ä#a1¶nݺiÓ&Ï?ý;î¸cæÌ™Ï>û,%cii©9øô°`Á©‡Š©7þõ¯=räˆ9 ;bij°ê*++Õˆ4=¨o××"¥gäP¥]l «{÷î’V·ÔÇŒs‰[F¶ vìhDìÐ0J‹àrá°w{¹]`r #“~Kòp¸W† ˆÄŽ]@ì\1ƒË…ÕÞQÂp˜÷åÐÄä>FžÐ[Rã0GÌÄU;v4J`p¹°Ú;Jó¾ö«7oÞ¼Q»uçÎ>l¹H¯”ƒñ|èUi–Ô0GLÄ #b‡†QØ .V{G s‰=*Ý»w§aä°_ýÚk¯mô¤OŸ>òtº¤Œ¬™ÌŽ2•UšTãäÉ“Guûí·7\àzä‘Gd¸t—/_>|øðÔÔÔ†3¶òtZŒL@ìÐ0"vh%`¸CoÞMAÀFpè·d²¤†{e€]@ìÐ0 ÄΕ‚0¸\8ìåš‚h8Ti8_ §OŸÎÎÎŽïСm)m;4ŒàŠA ´5¤§§'%%yÖL¶lÙR^^~ï½÷Ò0r8Pžh ¢adàÐoÙ¨(BÈ. Õ™ÌWTTLž<9<<ü‰'žøÛßþæIu©h‘Ò±CÃÈ$vd€`kp8uuu fee-_¾<''‡†Ѐü¯@S@-ÎB÷îÝ{ß¾}Ó§O?uêÔ믿ޱcG"íR¼&±# €­Áápdgg7mÚ´áãGÎ;·¦¦††ñ|èíåš‚\æX´¬¬L±nݺ'žx"99Yöª¬)€0[ƒÃáhÔìÁívÓ0rØ; h€¦ €vèÍ—–––”” <øûï¿7Ÿ”••Íš5«¾¾þ³Ï>£$À¦Ð‹I`°eÔÔÔ4jH&fä°w”Ã^n`¿:@1î•iݺu£ Ð"ÀpˆI`Ø.¸#—˜‘ÃÞQìåpp¸WfïÞ½'Nœ˜0aÂùK¯ž[ÐiH°bG[CYY™â3oÔ9ìå¹—›h H…ñ¢ººúå—_~þùç¥Z´KÕ_¼„«žÀÀ$vd€0ÿ‡½Üà°_]Kó<‡=Ø•® sÄ|(..îÔ©“Ê‹—Ô;W ÀÖ0eÊ”¡C‡Ž7ް‘ZÙ9À¡)HKó<‡=Ø•ÀÅÐRxh‘ª¿x W=É+ùàbà,k(,,lÔ@Ï€](--5Ÿ,X uwŠz£bóæÍšçËÊÊöíÛGLÊÁH¬äÓ@KᡱÚiÛ¶íûï¿¿nݺAƒ©¹xI¥‘Ãà°¢¢bòäÉ*×ÕÕÁb~Çår‘7r`ðàÁžÜyçÄŒBß‹@IÊÁ¸fÍ·ÛÝèáæÍ›322È…uuuffföë׌¡¤¤äÛo¿mÑ¢EÉïìß¿РA·Ür £aEEE~~~mÚ´IMM=wîœ<‘^cXXXqqñÈ‘#o¹å–#GŽÐ3‚+-ÐSQQqòäÉààà-ZP5´£¥yžÃ/1kóÆù‚ÄÐRx(“^bÕNRͬÞ(x´y›Æ²²2!ĺuëžxâ ÙëêêàÊ@ ´5Ô××/Z´(--­¸¸Ø|5mÚ´”””‹qd;#8ìåF/"Þ‡6oõ'VàŒ ËÑRx¨—îÙ³çb/IB½Ñá6ïÒÒR³É®´´T1`À€>ø`Ö¬Y6lô{VoW ÀÖ’’²aÆ”””¾}û†††VUUíÞ½{É’%ÕÕÕ‹/¦a䇽Ü=ôÐĉÕ ÕÕÕ+W®”4Uoä€ËåRvšz)£à18¬©©Qü‹Uo$–ÂC½´wïÞÕÕÕ3gÎTvϰz#‡ÁaëÖ­½z#¸bÐm ááá999±±± 8p`À€.—‹†нˆx^Òt #ø?ÑRx Ú‘‡6ï½{÷^l]]ÒDƒz#¸b°l uuuFFFž>}šŒ‘´÷rsXn"“ûH89Àd%Ÿ¸b|tDHLLLJJ:tèçI~~þ<0dÈ2ÆôôôúúúF·lÙòÎ;ï1Ö××/X° :::,,¬C‡aaaÑÑÑ .<ÿÛ°¯QQSSã¾”Œäar #‡Áaxx¸J#ÔZ¤b§wïÞ7Üpƒ9Tk£"$$äù矗÷õ½Áxúô錌Œ‚‚ÂF)è:~𥥥 B§ÓÙ¦M§Ó)„HHH(--%c î±HNNމ‰IMMÍÍÍÍËËËÍÍ]¾|yttôìÙ³É/ø[•Šz#À«˜Fý–Û¶mS©Ób<®³:JÅRÄ #“Ø1!|•,t/ASCý¯T™Q0h=zô¨Q£<èy’——7lذÛo¿Œ‘CS‡^DõÍóZ¤Œ¬(///(((//§gBø^2ƆjÙ RÄ1ÇNIIÉ·ß~Û¢E‹’ßÙ¿ÿ Aƒn¹å2FI`l1ÛÚÈa/÷ AƒÒÒÒ$}q/1^ð_±ýêaaa çMLöïß.ɨEÊÁèp•æv»çÏŸå™|ŠŠZ°`ÁùÓFö5rx‹l¨&,EìÐ02‰Å œd–TÑ .G•FFFnß¾]å^#õF&MAä{™ÜGÂÁÈ¡ß2%%eÆ )))}ûö ­ªªÚ½{÷’%Kª««/^LÃȀءaä;®³’…î85*++É9ðöÛo“7ž÷ zÕ7Ïk‘r0rè·ä°¤ær¹$}eï1zÐRx(“"vh9ÄŽaUUUÏ<󌼯ï F` .{G=¨ŸRfdÒD¾‘É}$Œª´ÀÀÀóg¾Nœ8HÆȀءaDì€K€hk(..~â‰'¾þúëS§N5|HÉÈáÌ@ph â€úæy-RFý–‰‰‰III‹/ö\ù–ŸŸŸœœ»wï^Æó!<æ°—›Ã¼/‡¦ &÷‘p0r¨Ònºé¦;v¨\àRoô@ï-RKᡱÚAìÐ0"vˆ­-ЊäädóÅO?ý´råʤ¤¤Î;—””¬]»v̘14Œ¬àÐÊŽ¦ F-Íó„Ûæ59ô[Þu×]III*¸Ô £¥ðÐXí vh;àØ{øî=ôïßÙ²e×\sù¡Ëåºûî»·mÛFÉègÚó¾Ð¼ý––Ôª©.Åh)<4V;„AìÐ06TSIØû»÷‚ƒƒ+++}}}=O®¿þú]»vQ2 )ˆ†‘É}$Œª´ªªªóJÖTol¨¦:G¬¥ðP/Eìȱ#±# ´@[C§NV­ZõÈ#˜nݺUv¬ÞHxï(«½Üh ¢“ãX99ô[ª¯“4Vf„Xõ…‡)b‡†‘ð_¢7¨m=úX¶Š;v >¼mÛ¶æ•ÇôÑGñññ”Œ„Ï ÌÈÈ0_\p¯ÑK/½DÀèüoC5Õ`l‡%5ÂsÄÔº¤€ˆ¨7Jµše¸\®ìììââ⨨¨±cdž‡‡3rØ;Êa/7š‚ˆÝáP¥¥§§_â³2–¿Ô Ï7D}©£KJÄbÇ.FI`l ß}÷]@@@×®]…n·»K—.ÄŒöŽb/7ðr˜ÜGÂÁèC•ÆsÄê •RÄŽÄŽ]à;’À`k˜:uê¼yóºvíúÞ{ïM˜0ÁÇÇgÉ’%ž-+4ŒöŽrØËÍaÞ—pS“ûH8=pØpÈ÷ʨ/ív»ƒƒƒ©9ì%¼—MH7 IDATAR³NKS“ûHÈ ÷[zèÓ§ÏäÉ“.p™‡ÆQ2r8_Pè(u´H;Œˆ2±#´@[F`` m#‡3}||Ξ=»sçÎýë_!!!²›=”ÑD̨¥yžÃõFÂý––/_>|øðW_}µáiÄŒÞ"…ŽRG½±CÈءa”…ÆÕg¼>}údgg§¥¥™-Ö555-[¶$fDS Ô·ëk‘r0rè·4 ãĉ«W¯~ê©§V¯^ír¹dë´ ;4Œb\´@ƒÿgzøðÃï¼óNÃ06lØ0räÈ]»veeeImÖUoDS¦ &÷‘p0 ý–æW×®]…n·»K—.ÄŒ€ ˆFĸƒÿV{Gƒ½Ü¸ë˜Æ~u&÷‘p0r¨Ònºé¦yóæ%&&¾÷Þ{&LðññY²d‰gRÌÖFVsÄL@ìÐ0"vlm”ŽÞh`#8œÈ4@}ó¼)#‡~˺º:Ã0 ”‘‘‘——CÃøöï<ÿüó‘‘‘)))éééóçÏïСƒ¤ÃŠÕ¹Ø¡aDìØÚ( €- OŸ>í/ £½£TØ·oß¡C‡Ì×?ýôÓ?ü@ÏÈ&÷‘p0®Ò<4oÞÜ0Œ3gÎ8Ϊª*ó{ f$?G¬¥ðÐ"EìÐ0"vh%S - ººzõêÕûì¤I“390uêÔyóæuíÚUYS2#«¦ #FTTTxšç¯¿þú믿^žN—”ƒ‘ðáóbbb6nÜèr¹zöìR[[ëïïOÌøÝwß5 „ˆˆˆ“'OR2j)<´H;4ŒˆFI`lW_}õ­·Þz±Ïzn±µQð8Pžùùùýë_…©©©k×®5Û:VfLNN6_\p÷¸å:-Ɔp¸„ƒ‘C•6oÞ<ÏiBˆï¿ÿÞsM#ù9b-…‡©@ì0"vhe¡kéØì%š‚h8ô[†Q[[[]]-Û¢ÑÈä^@Ä #bçŠÁ)ÐSQQqòäÉààà-Z3’?3°oß¾—ø ín4¥O?ý´ËåZ»víÎ;kkk;vìxüøqËEÁÁÁ•••¾¾¾ž'×_ý®]»(%È>Ïò÷ÊxP_êè’±C>±c-h¶†úúúE‹¥¥¥›O¢¢¢¦M›–’’Ò°&¶µ‘ðÞQ&{¹ÑDÃ(A¾ß’æŒíÔ©S…•••j®9UiT_xè’r±C±# ­ëÏtHNNމ‰IMMÍÍÍÍËËËÍÍ]¾|yttôìÙ³ÉÉŸxûí·_â³·Ýv£ š‚^‡{eÔº¤Ø±# ´@[CxxxNNNlllÇ0`€Ëå¢aäp5< í¦ ]Íóä÷hùÅ„††––– <8))É<í¯  €’Q}á¡^ŠØ6‚CìH-ÐÖPWWÑèaddäéÓ§É9œè!##Cv›®v#aÈ71¹„ƒý2ê õRİbG[CbbbRRÒâÅ‹=“”ùùùÉÉÉC† !cä°wÔÃ}÷ݧx8ªÞHòûÕ™ÜGÂÁÈ&KjæˆÕꥈ v$¡qiÊZÐm ÇŸ0aÂçŸît:CBBª««kkk²²²"##i§3Õê„AS° ª´N:½úê«ûì¤I“,?^½Qñá‡zflG޹k×®¬¬¬—^zÉr‘F£–ÂC‹”6ˆÄŽŒ’@Ím%Ü·oŸyL¯^½âââ虀°­aµ_]Kó<‡=jŒª´1cÆlܸñbŸ6lئM›ìn4a2G¬¥ðÐ"EìØ×ˆØ‘ë¬PspªªªBCCé9Ìû w{Ð2uÂa†H‘O•€í@ìØ×À¥ÁئL™2tèÐqãÆ6p˜÷Ìš‚0&cäÕ%5&h)<4V;ˆ vÀPrÙ} ”––FÛÈ5kÖ¸ÝîF7oÞœ‘‘AÈ»Žå¡Þh¢%ÆÕK9ß~ûmòFÿ £¥ðÐXípøçŠØ¡aL5ð?8Žººº€€€†³²²–/_ž““CÈ¡¾]_‹”ƒ‘Ãr# bF»Á× YLEE…yHC‹-(™ìBlÞ¼¹iÓ¦ Ÿ”••íÛ·O†K—ÑMA¶æÜ¹sB•Õ’© #vA}©£EŠØ€*˜¢°†úúúE‹¥¥¥›O¢¢¢¦M›–’’âëëKÀÈdï¨Ãá¸ØoÏív[®Óbl¨Æœ¨}3fÌ5×\óÔSO5|¸sçέ[·>ýôÓd¤éééIIIþF¶lÙR^^~ï½÷Ò0zàð'IuIɱúRG‹±C̈ر¯Q¨G­aöìÙ6lHNNîÛ·¯yýÉîÝ»—,Yr×]w-^¼˜€‘É™lH–ŠzcC5ù·=´nÝúßÿþw·nÝ„uuuï¿ÿþ„ :4räÈ~øŒ”Õ®ªU˜Ì«/u´H;ÄŒTa;’@=j ááá999±±± 8p`À€.—‹†‘3ÆÏÏÏår…„„!JJJúôéSZZZYYÙ¦M›ºº:2R‡Ã‘ÝhÀÑ£GçÎ[SSCÃÈòKjL戵ꥈ vhÄŽ$PZƒÓé,**j´/ÅårÅÄÄÔÖÖÒ0z ºÏYQVV.U¡Ýèê¼/“¦ ¶mÛnÚ´©W¯^Bˆ;w0 ¦¦¦¨¨èÚk¯-++³\§KÊaWù*M0[R#Œ–ÂC½±CÈØ—`k0ïzY¼x±g’2???99ÙÏÏï“%ö2’ßç܃|ò0i º÷Þ{?¾jÕ*!IJeËV¯^ýÄOWTT¼÷Þ{–ëtI94ep¨ÒX-©>_P}á¡Eê%! ÞˆØ±ÑáØ‘ÀÖpüøñ &|þùçN§3$$¤ººº¶¶6!!!+++22’†‘ü>gÁcO~Þ—ISÐáÇ ßÿý={ö<ûì³]»v]¿~}çÎ-×é’zI](ÛH¾Jã°¤ÖPMuw‰úÂC‹±Cƈءd´{÷ÞÆÁƒ÷íÛg®ãõêÕ+..Ž’‘Ã>gƒ|ó¾Lp»ÝùùùmÚ´ £*å°+C•ÆaDÑPM»U_ê(–"vÈ;”ŒÖbïïÞ !Ü:ËaŸ3‡A>‡y_š‚˜\ÈIUcC5ùJ±cw8üIr06T“‹1€¸ÝîùóçGEEy~±QQQ ,p»ÝdŒ£G5jÔÁƒ=Oòòò† fîÏ¡a ,//oôðĉdŒBß‹@ÆØP-[¡Ë¨>´H'Ožüî»ïJúâ^bBÔÕÕÑ6º\.•:-F•••Tˆ2FÄ £±#‰&V¨™’’’²aÆ”””F¬ÕÕÕ’YÕÓÒÒ&L˜×hÎÚµkeè´“’’Î?lcÈ!dŒBˆššÅ3”êäQŸZ¤………•••2¾²÷].—â¿õFO{§²E<õFêï8UfDìH±#Ä1£Åè!,,¬áB¥ÉþýûÃÃÃÉMòóó333_{íµÌÌÌüü|©.õÆÒÒÒ„„!„ÓélÓ¦ÓéB$$$”––’1 ó¾ ÕTZ@Wì»Ã¡IjÍš5çñÍ›7gdd1"v€@ìÐ0J`kàÐ:ËÚƒ|4Ñ0jI±S^^^PPp¾€‘C¿errrLLLjjjnnn^^^nnîòåË£££gÏžMÆxÁ™¾ÌÌÌ~ýú‘1"vÈ;4ŒbG6ßÁì5p¸xÊ”)C‡7nœŒ/î%F„3ce$ “ 99ÜL6xðàñãÇ?üðÃ2¾¸—q¾ #b±c##b‡†Qß”àÐ:;hР´´4I_ÜKŒf894©O-RSøàÐ$%œ/ˆØ!cäb‡†QX¶Ú÷s€Ã=ÀŒ|î:&!'‡)|„›284Iñ¹s±CÀè±ck#ŸØ±­Ão`KïŠáp˜£";;ûƒÿMjjªÓé$cd‡)|Mš¤§óiƒØ¡aDìÐ0J+ÀУG›o¾¹}ûö)))T‚Ç®§ÓYTTÔhVÒårÅÄÄÔÖÖÂh£Ãá¸Ø¿·ÛMèe‡¼z)‡)|M&´›¤ÊÊÊ<— ¨A±QKá¡EŠØ¡a4AìØÝ( ­Ão">ðÀ:u"l4xìŠ=zô¨Q£.Wæåå 6Ì|s‚Ñ.FÁ`NTýy-RSøš2XAµIJKá¡EŠØ¡adÕØ‘Ààrá—Þö89 €YAûf2ý–î\áÐSÊ ÄŽÝˆFI4±~M¥®®.""¢ÑÃÈÈÈÓ§O“1FFFnß¾]eû Œ2p¹\ŠOhPoô år)ÅÒ¸¸¸–-[R5&&&&%%ßý8dÈ2ÆÂÂÂÊÊJI_ÜKŒ)))6lHIIiÔáY]]-©ÃS½‘ˆ»;4Œ²Ð=¶Cë,+84Ì6j™…å0ÙŒ“ZÀ•Á¡IŠ ˆF v® €ÁåB>.»wï>mÚ´E‹Éøâ^b4x¼µs0j¹7’ÃAº.ä¤Ýoé𜇞R& v( ÄŽÍ’Àü1Ç%3ƒÑFF-³°&›5No®Ò8ÌI¡IŠ ˆFÄ £$0ÂqÉoíŒZfa9L6ãBNp˜“"ß$ÅÄ #b‡†Q[Ú5kÎÿÞ¼ysFF#‡¸ä‡·vF-³°&›Õ9Tiæ¤L7I: -RÄ #b‡’Ñr0¶q¡KP233ûõëGÆÈ!.9Àá­ƒQË,,‡Éf\È)sRP_xh‘"vh;à8 ÃàOãp8²³³›6mÚðáÑ£GçÎ[SSCÞ““ã9¿ÞäÀ p¹\4Œ8~üø„ >ÿüs§ÓR]]][[›•• £]Œ&*/—Ò(¥mt:EEEn=q¹\111µµµ4ŒæLÐù7 øùùmܸ‘†qÊ”)C‡7nœŒ/î%Fõ…‡.©@ìØßˆØ¡a”ÀÖàp8|}}/ø)·ÛMÃÈ!.ù@û­€C•ÆaNjðàÁãÇøá‡e|q/1ª/lù÷)£Tid :ï&x €;ÄŒL ;’ÀØÊÊÊÂÃÃiMhÇåßh³²²–/_ž““CÀÈä­}öìÙ6lHNNîÛ·ohhhUUÕîÝ»—,Yr×]w-^¼˜†»€y7ónBSá¡«Ú¶±C&v$0ÿ‡ÃÌ8€Û€]àP¥aÞ ónÀ«@ìÐ0 ÄΟAýÁÓ$™ºàD(,,¬¬¬$lÜ´iÓ™3gÒÓÓ MjjjÜ‚’‘<‰‰‰III‡ò<ÉÏÏà† BÆhо}û7ÞxCåŠz#êêê"""=ŒŒŒ<}ú4£úRG—”6ˆ2pˆI €ÿÃafàp`À«ÀE Å v® €-ç°Ù>‡™q€ömçÎBøø(íâÑ"À°šwÓRx Ú ¬bÇZ0¶œÃF Ld/g̘1×\sÍSO=ÕðáÎ;·nÝúôÓO“‘’¿™ ƒö¼›–ÂC½±ìíØ‘…ÆýÇ”HNNމ‰IMMÍÍÍÍËËËÍÍ]¾|yttôìÙ³É9àv»çÏŸåù‰ŠŠZ°`Ûí&c\³fÍù_|óæÍ4ŒLlkÕªU^^žùúôéÓ™™™†a´H9Lá4(--MÒ÷#æÝh;dŒˆF±# €­¡´´4!!Aát:Û´iãt:… ¥¥¥dŒâ’ÃDãg(¥¢Þ¨>´H9Lásón4Œˆ2F vh%k¬„öõ9àp0#ŸËÈ_Èž““Ûðá àr¹h=¾EÏœíZ¼x±ç›ŸŸŸœœìçç·qãFFÄ)b‡†±CÃ( ­Ãoj;vlÇŽÅÅÅ„é]1&ùùù™™™¯½öZfff~~>Œ¶3r8° ¦ð9ô[rh’ OL´*¥ˆFÄ £$0¶†S§N;Öáp8!ĸqãjjjˆ9Ä%+ÈOdÐ6ªß!¯EÊ¡cŸO¿%æÝìnT_xh‘"vhM;v7J`k˜={öˆ#Ž?{ôèÑ[o½õÑG%f$—III—øì½÷ÞKÀhð˜Èà`T¿C^‹”Ã>‡“ZXAxÞM}á¡EŠØ¡adáØ‘ÀÖбcÇcÇŽ†kFIIIûöí‰ÉÇe§NþsqZµjEÀh0˜È`bdí)|ý–.à0料ðÐ%5;ö7"vh%°5\uÕUæ 3 Ïž=Û¢E bFòqÙ¾}{ç%!`4Ld01zÐ2 Ëa²Y™‘C¿%‡K8Ì»©/œ˜‘C\r€üD£–YX“Í8©\æÝÔZ¤ˆFpˆI`l ÷ÝwŸÙöвeË   «¯¾º  €˜qIŒZfa9L6㤩^Rã0料ðÐ"EìP2ˆ›%°5¸Ýîß~ûÍ0Œ-[¶ìÙ³GA+¼z£ “¸$ ‡‰ F-³°&›Yu³+3rXRã0殮ðP/EìÐ0"vh%0øÃŽK>p˜È mÔ2 Ëa²™C7»z#‡%5ónL@ìÐ0"vh%°ÅTTT<òÈ#$âƒðd–YX“ͺÙÕù,©Ñžwó ¾ÔQ)EìÐ0"v(-`‹)** #iä—äÁ]Ç2tZŒZfa9L6sèfWoä°¤Æ õ¥ŽJ)b‡†±.Ã0 ¬£¸¸¸wïÞeeeôŒááá999±±± 8p`À€.—‹†‘<;wÎÈȸØgÇŒSRRbw£böìÙ6lHNNîÛ·ohhhUUÕîÝ»—,Yr×]w-^¼Ør£ÉÁƒ÷íÛwòäÉààà^½zÅÅÅÉsi”Ò6:΢¢¢-Z4|èr¹bbbjkkiÍ…¬Å‹{ò¸víÚ‹}vâĉ—X²‹ñ‹/¾Ø¹sçÅ>ûé§ŸZ«ÓbB¤¤¤lذ!%%¥ÑIxÕÕÕ’NÂSoäb±c£@ìü Ðm1'Ož|õÕWÿñÐ3ÖÔÔ<ðÀÙÙÙBÃ0Æ÷Æo8NJFP ‡~˘˜˜K·­ÖÔÔØÝ(˜]” ¾ÔÑ%% bG vìc”Ààr™3gN~~þ믿~óÍ7òÉ'S§N]ºtéÿ×ÞýÅ4y، ÚZ-4@D¬:eà²,ƈ\€Ë2X¢™bpqÞÊ’nêŒgV®ŒQ!!š.t¸£&N3$½ Ñf,¥Ê’Q!ÕÂ〤ð»h~ü ¿Aô¡íóœÓ÷ëb!ç!ùp1¿9ßó<ç™!dXÛ†4’d–– Ò“à^Ȳ# ÊŽf4À±1>>®(Ê| qâÒ¥KÿüóO‡Ãát:»»»ûúú>û쳸~º“øDH Ö¶ôgï)zL<ô „ e'Nh€ccË–-k×®=|øðÛƒçæÍ›GŽ‘#qÁ‚ÿþû¯¢(ÑvttttÉ’%/_¾ŒG–^‰kÛ/.JHüÄC¯P@ÉPvâ„86–,Yrûöí+V(ЇÛÛÛ«««ý~UUÕÇåHÌËË{ðàAVV–Óéüûï¿=úàÁƒ«W¯Æ#K¯D4“û^™ÄO<ô "wÙ‰àØHKK ƒ™™™Š¢<þ¼¤¤¤¯¯o``Àn·‡Ãa9¿þú몪ªíÛ·Ûl¶p8\PPðûï¿çççÇ#K¯D@¥¥¥¡Ph†_ˆÇN]BGâ'z…× Å†Õj ÅÅÅŠ¢<~üXUÕH$¢ªjüŽ,N|â… ¢Ë%/^´Ùl«W¯ž3gNœ²ôJ„ ªjccãtOwïÞ-M( „$9{/ñ½BãK’²'4À±±qãÆƒž;wNQ”–––7oÞ;v¬··wýúõÒ$N6Ÿ›6mŠS„Ö¬YóùçŸO÷tíÚµÒ„BH’KG?ñÐ+0¾$);qÂ'бÑÓÓSQQæÏŸßÞÞ~ÿþý£Gµ¶¶Ê‘8i``àÇ<}út\SôMà%ÉÙ{ºLü§ÄO<ô  7Vñc£ªªÊd28pàôéÓÏž=SeÛ¶m—.]²Ûí¿þú«‰sçþß‘i‘H$zBÕG}ôàÁƒxÄé’‡7À#ñ½BÈILl˜Íæžžž¬¬¬ m~cšIDAT`0¸hÑ¢§OŸÚíögÏž­Y³&Nî&>qŸ@Æ188h2™’!€¾t™xè8Û +>ŽÑÑÑè•t ,P%zc{ffæèè¨4‰Œc|||||\Q”D6¢º„0]&ÌvÄ÷ÇFQQÑùóç÷ïßßÜÜœ——wèСo¿ýöܹs%%%Ò$0Ž­[·®]»öðáÃoz<ž›7o9rD¦PÀøJKKC¡Ð ¿DOTtšx0ÛþS’”8áèØ¸råÊŽ;>üðÃÔÔÔ{÷îÕÖÖz<«ÕzõêÕO?ýTŽÄI¯^½:sæÌ÷ß×}ƒ[²dÉíÛ·W¬X¡(J8noo¯®®öûýUUU>”)0¾¥K—ž9sfº§»wïîïï=QÑiâ¡ãl0²$);qB3=òz½ëÖ­³ÛíOŸ>]¼xñÛÇ8IÀ ÒÒÒ‚Á`ôkÀçÏŸ—””ôõõ Øíöp8,S(`|[¶l¹råÊtO¿øâ‹k×®‰ž¥ËăÙðÿ%OÙ‰`¼«èÞ¿DÞ—øD@999×®]+..VÅãñ”—— =yòä“O>‰ßÁ0º„Ä­E ìÝ»w†§»ví’ QQ”­[·þüóÏS=ÏñãÇã§K" Š79Ì;wõêÕ¡P(,\¸0;;[ÖPºÓeâ!Э*Â`¼«ÄoÿcÃ!0H$R__ïp8ÌfsAAÙlv8'NœˆD"’…b>€¡PvÞ—Øï¯‘H‰ßþdžC`:ß}÷][[[]]]ii©Édìììlhhøê«¯Ün·L¡1DŒ÷øíl8þ“ÅbéèèX¾|ùÛƒ]]]åååÁ`P¦P@ÍÍÍ555sæÌy{ðÆ/_¾Ü¹s§‰ …²£Ÿ@ã=Ì;×n·geeIœ![­Ö)ƒ6›mddD²P@ûöí{óæÍ”AUUÏž=+M"C¡ìhÆ!XxW~¿ÿË/¿ôûý7oÞìèèøå—_–-[ÖØØ¸xñb9QTTTÔÔÔ¸ÝîÉ÷±>Ÿ¯®®®²²R²P@ׯ_OMM}{äÅ‹^¯W¦D†BÙцO ñ®ªªªL&ÓNŸ>ýìÙ3EQ¶mÛvéÒ%»Ýþ믿ʑˆ¢¿¿¿ººúÎ;ééé™™™ªª—••]¾|Ùf³É !%%eÊg“ÆÆÆäH`(”Íh€ñ®ÌfsOOOVVV0\´hÑÓ§Oívû³gÏÖ¬Y3ó5}%béîîöz½¯^½Z¸paqq±Óé”50¸”””p8Æ»^À»`ÁEQ233£ÿ•&‹ÓéL|ÿ©K(@LpÞUQQÑùóçEinnÎËË;tèßï?tèPII‰4‰€(jkk[[[“!B0Lð[‘Ä'0ÊŽf¼Æ»:|øðŽ;êêêRSSïÝ»W[[{êÔ)«ÕzõêUiQÄ_C¯K( ‹Åý! E7dggK–ÀP(;š±ïáÑ£G^¯wݺuv»}bbâéÓ§‹/ž;7ŽË(‰Oà}E"‘“'O655õööFGrss÷ïßïr¹¦;3F¸D†BÙÑŒFïaÙ²eË–-‹þœ’’’››+_" ]VaE_úbÎårµµµ¹\®ÒÒR“É488ØÙÙÙÐРªªÛí–#€¡Pv´›ÞAMMÍ OwîÜ)A" ±±±ãÇ¿½$”››[__?66&Y( ³ÙÜÝÝ=eð¯¿þ²X,Ò$0ÊŽf¼Æ;¹{÷®Çã™îé­[·$H¢Ë*¬>>>>ž ¡€šššTUu:999+W®nll”&€¡Pv4c0ˆgË–-k×®=|øðÛƒçæÍ›GŽ‘)Hww·×ë}õêÕÂ… ‹‹‹N§|‰ …²£ 0ˆgÉ’%·oß^±b…¢(áp¸½½½ººÚï÷WUU=|øP¦P€¢ñ¤¥¥ƒÁÌÌLEQž?^RRÒ××700`·ÛÃá°L¡€ñ­ZµjÆ yyy.—KÖD†BÙ™ ö€x¬Vk ˆþüøñcUU#‘ˆªªééé’…Æwíڵׯ_777KœÀP(;³Á`ÏÎ;ûûûÏ;§(Ê©S§øá‡ÞÞÞP(ôÛo¿É C4À žžžžŠŠŠ@ 0þüöööû÷ï=z´¨¨¨µµµ°°P¦P€¢!ù|>»Ýn6›åˆ`X(Š^E-w(Àìqˆ'‰Ô××;³Ù\PP`6›lj'"‘ˆd¡14Wï?ðÞ\.W[[›Ëå*--5™Lƒƒƒ ªªºÝn™BbˆO @<‹¥££cùòåovuu•——ƒA™B!477×ÔÔÌ™3çíÁ7n¼|ùrçÎr$0ÊŽf| â ‡ÃV«uÊ Íf‘,¾}ûÞ¼y3ePUÕ³gÏJ“ÀP(;šñ 4ˆ§¢¢¢¦¦ÆívO¾õù|uuu•••’…¢¸~ýzjjêÛ#/^¼ðz½2%0ÊŽ6| âéïﯮ®¾sçNzzzff¦ªªÃÃÃeee—/_¶Ùl2…BHII™òYऱ±19 eG3`Uww·×ëÞHT\\ìt:e  .%%%Ï›7OâD†BÙÑŒ`V˜‰H0ÊŽf‚â©­­mmmM†P@Á`0Á“ÂÄ'0ÊŽf‚â É Áb±D…BÑ ÙÙÙ’%0ÊŽf¼ñüñÇß|óM2„BˆD"õõõ‡Ãl6˜Íf‡ÃqâĉH$"M"C¡ìhÆ`˜.«°¢/ý1çr¹ÚÚÚ\.Wii©ÉdìììlhhPUÕívË‘ÀP(;šqˆ'‰œŸ¯®®.--íÊ•+r$0ÊŽf‚âÑeVš¥_ æšššTUu:999+W®nll”&€¡Pv4ã 0ˆG—UXi–~8éîîöz½Ñ#⊋‹N§|‰ …²£ 0ˆ§¿¿¿ººúÎ;ééé™™™ªª—••]¾|Ùf³É C4À *]Va%Xúb®¶¶vÓ¦MÛ·o—8€¡Pv4ãh•ÓéL|ÿ©K(`p@```@îD†BÙÑŒ7À ˜½{÷ž?~º§»víjii‘# ¶x ‚¹{÷®Çã™îé­[·¤ „ …¢²³³eM`(”÷Å`L~~þ‹/fø…¡¡!9BQD"‘“'O655õööFGrss÷ïßïr¹æÌ™#G"C¡ìhÆ`L H’P@.—«­­Íår•––šL¦ÁÁÁÎÎΆ†UUÝn·‰ …²£o€fÅb±tttL^‘ÕÕÕU^^ åH`(”Í>Ðû[8¶Z­Sm6ÛÈȈ4‰ …²£ 0À¬TTTÔÔÔøýþÉŸÏ·gÏžÊÊJi eG3`€YijjRUÕétfddäääddd¬\¹rxx¸±±QšD†BÙÑŒ=À1ÐÝÝíõz£·ƒ;Nù eG`@RàhíöîÝ;ÃÓ]»vIÀP(;³Á=ÀÚݽ{×ãñL÷ôÖ­[$0ÊÎlð 4€vùùù/^¼˜á†††DO`(”Ù $ö’ 0 )Ð’ 0 )Ð’ 0 )Ð’ 0 )Ð’ 0 )Ð’ 0 )Ð’ 0 )Ð’ 0 )Ð’ÂÿÆv ª$*™IEND®B`‚sleef-3.3.1/doc/html/osi_logo.png000066400000000000000000000046541333715643700166720ustar00rootroot00000000000000‰PNG  IHDR´ÎeZ5¦PLTESÿÿÿ=¦9ÖÙÖ=j9t–œšNk"­I» í´¶´Ú(sšn?O@ïŒÓÚ’¹d!j˜uûI¢§¥ÛK‰§µÛg^{Nk·½óœÖ%:º_]’‡,zí|=´ÙŠD± dÈ¢#ƒX¬„9S/‚ qуÐC)rvÑ•ÐC*òä¢k¡‡Tdå¢w¦^JÄF±Ô´+±Q,5-ŽÅ†•º\l‹¾WRëô ú$¥î¥Ø]}©¹/öy±It••š‹hVìE]A‘鸎#Fߦ;E&[Ç“¢Ø`l"Ž=ÐôÚ¼?_"w>U%HNŽååG¬¿S‘"È'þúþKÉ,g ð)^»ÀŽì^gѵ'aË9”wFÁ;ºJ ï=td¯–Ã,èÑj ­‘9›@Ÿ:=ßdbC¦uC§õPÁĆHk6jÝú™Ø­*LÐd+†zXEÂt´x‘^[U9B#Õ39"1hÑïTª¶í×¹™ÈY`¢Ò%Ú¹Zz(Có,mÆq(7L‚¨Ð,Zäsµ ýévè' o/þËaœŸ|å%J«q^¡oºðóTüQß…²VÐ ôܦ;颼íB·Ýws·Fl5,Ž8ÑG,zpб|0cÖwšƒ'g Á«²!ØÝÛôÑ]o¤íõóó—ܱL ëìšôU¼­p“žµÜ5#ö:ŠåN¬¤Öm<“L§Š™»Hw|ˆ¤Ó6­g®ÿ‘FÃÌõŸÙçØÝûÇÂü« bÐW-kú2ì>^¸×ZÊþ6}‡sï)–é¯ö@È.ÍÉ®(T“Ÿ6Š"ŽìuDë8Aÿ¶8îÐÐþŒ<ô)ƒ^˜ß¦4¦¶¼'“£j¬€x–aVÉ¥ÔDk±ˆ¥ÔzçNÍKyaÑPj¢³™Í(Š8ÆëÆ3%Þé÷@OYD3ýB&ŠlGZ¢™~}ºÌE9¿­aý8¶Ôƒh£ü(2ŽB¢½qŽ#€Z Àü Zßi¦f¾^ߎ"±µ¡·Q;€Ðùh Lóûh †ÃGÛA£IÍ×AÌÉDûdËõãØ2ˆšDŸ_J£9CÍŠ 6Š~[ÞË tE·ÅÜò:í£¹PÀDÑcC§shº}gËÜÆ‘Nk4ÕFŸŒ¢m½‘NçÑÌí0íB¿x :~ëº%aSqlÆ¢úm»ý9ƒæÆÒOx+ý?韟/¯Û…f$ÑË·‚ð¿w«íÛÛåásMIÀ–{mÞÉ«¿Ú·›ÞFq0àÍ2“½æ¾ï' úór &¹.Ëd{D ×"úòõwžÇNH½E“6;—U­ ¿üjcÇXV{êJC=ü„¦G±½Qô–l‘§Å´X„›±**žûcc¦çãX>Ÿ™c&mn~F“¶5g]øF=„HiíÑó{þLUÂ-Ñ‹V×Ãêš¶RyþhÜ?Ì(ªpS,Š…OÏÛÃÌÖ—i•„‹(oŸž·ÍÌ—Ê—IÃð…èZ=õëN¢Ò¸KIêi ÚuÏRì©7'z ˆ(êu-7¤k, ôËI4D¿ƒŠˆÚ )<XÐW§±qtQ‘µ8µsÆÕ‘.ðyÏŸZ|0œÓ-n*ìë”hôO$ŽqزTÄñÃν-[Šô» ?Ò¼¨©4>‘FrFKÃVÜ¡'^þ¸–VH쇕Í3N­(²¿ðHs]bÃØ"“Tä ÐðeZsa †+ îKBÎ1/é"ÿœæò¼]ñºz ’FÚöi……ÜÓ'ºÎänÌ0ÑÒP×§˜$UèP> Û…A£~~3Ÿ.÷\-!éêët~éÁÑ©4is¤›|Z&_ö*=H‚£9ÌÓ{·>Mê¨<ºv´:ÒiÉÁ8Ú óiixÎã+Y>ÝXÚM¨i‰‡1µÍáR34ÿØJ5O 8šÀŸ8ÃLtdUc'ß¶Ô<‚ãÔ>t}¢[ž8M´b‰Ù‚SÇì8ûO,¡"…Ä-NtÁ×QÍ•wƒÐÔÆFR,3šë¬ðÔãÆ§±ìïPÕèZ4\ly‡úHkÄÝ]JôYw‡ÆÒk|_d CÂ1þMHÝrfGøÔ:šMIV5Dšîy‘d:dË«ðªª9Wݘéaÿ–ûíþVb½ï¥ë϶ÄÓ¾ãÕí{Cަ²tC¤J§”æ?|Ëí…_EÐëÛ_»YX7½];”Käe½‘'Ú.@¥ñhU ½¹žFãÑÞIëšÂn`ÚK`³®ß˜ MäÑWìùÊÒ§%qŽV¥¹l†ŒÉWà?Њj™ SLEEF Documentation

SLEEF Documentation - Math library reference

Table of contents

Data types for PowerPC 64 architecture

Sleef_vector_float_2

Description

Sleef_vector_float_2 is a data type for storing two vector float values, which is defined in sleef.h as follows:

typedef struct {
  vector float x, y;
} Sleef_vector_float_2;

Sleef_vector_double_2

Description

Sleef_vector_double_2 is a data type for storing two vector double values, which is defined in sleef.h as follows:

typedef struct {
  vector double x, y;
} Sleef_vector_double_2;

Trigonometric Functions

Vectorized double precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_sind2_u10(vector double a);
vector double Sleef_sind2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sin_u10 with the same accuracy specification.


Vectorized single precision sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_sinf4_u10(vector float a);
vector float Sleef_sinf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u10 with the same accuracy specification.


Vectorized double precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_sind2_u35(vector double a);
vector double Sleef_sind2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sin_u35 with the same accuracy specification.


Vectorized single precision sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_sinf4_u35(vector float a);
vector float Sleef_sinf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinf_u35 with the same accuracy specification.


Vectorized double precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_cosd2_u10(vector double a);
vector double Sleef_cosd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cos_u10 with the same accuracy specification.


Vectorized single precision cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_cosf4_u10(vector float a);
vector float Sleef_cosf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u10 with the same accuracy specification.


Vectorized double precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_cosd2_u35(vector double a);
vector double Sleef_cosd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cos_u35 with the same accuracy specification.


Vectorized single precision cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_cosf4_u35(vector float a);
vector float Sleef_cosf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosf_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_vector_double_2 Sleef_sincosd2_u10(vector double a);
Sleef_vector_double_2 Sleef_sincosd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincos_u10 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_vector_float_2 Sleef_sincosf4_u10(vector float a);
Sleef_vector_float_2 Sleef_sincosf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u10 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_vector_double_2 Sleef_sincosd2_u35(vector double a);
Sleef_vector_double_2 Sleef_sincosd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincos_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_vector_float_2 Sleef_sincosf4_u35(vector float a);
Sleef_vector_float_2 Sleef_sincosf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincosf_u35 with the same accuracy specification.


Vectorized double precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_sinpid2_u05(vector double a);
vector double Sleef_sinpid2_u05vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpi_u05 with the same accuracy specification.


Vectorized single precision sine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_sinpif4_u05(vector float a);
vector float Sleef_sinpif4_u05vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinpif_u05 with the same accuracy specification.


Vectorized double precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_cospid2_u05(vector double a);
vector double Sleef_cospid2_u05vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospi_u05 with the same accuracy specification.


Vectorized single precision cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_cospif4_u05(vector float a);
vector float Sleef_cospif4_u05vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_vector_double_2 Sleef_sincospid2_u05(vector double a);
Sleef_vector_double_2 Sleef_sincospid2_u05vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospi_u05 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_vector_float_2 Sleef_sincospif4_u05(vector float a);
Sleef_vector_float_2 Sleef_sincospif4_u05vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_vector_double_2 Sleef_sincospid2_u35(vector double a);
Sleef_vector_double_2 Sleef_sincospid2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospi_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_vector_float_2 Sleef_sincospif4_u35(vector float a);
Sleef_vector_float_2 Sleef_sincospif4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sincospif_u35 with the same accuracy specification.


Vectorized double precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_tand2_u10(vector double a);
vector double Sleef_tand2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tan_u10 with the same accuracy specification.


Vectorized single precision tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_tanf4_u10(vector float a);
vector float Sleef_tanf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u10 with the same accuracy specification.


Vectorized double precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_tand2_u35(vector double a);
vector double Sleef_tand2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tan_u35 with the same accuracy specification.


Vectorized single precision tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_tanf4_u35(vector float a);
vector float Sleef_tanf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanf_u35 with the same accuracy specification.

Power, exponential, and logarithmic function

Vectorized double precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_powd2_u10(vector double a, vector double b);
vector double Sleef_powd2_u10vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_pow_u10 with the same accuracy specification.


Vectorized single precision power function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_powf4_u10(vector float a, vector float b);
vector float Sleef_powf4_u10vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_powf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_logd2_u10(vector double a);
vector double Sleef_logd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log_u10 with the same accuracy specification.


Vectorized single precision natural logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_logf4_u10(vector float a);
vector float Sleef_logf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_logd2_u35(vector double a);
vector double Sleef_logd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log_u35 with the same accuracy specification.


Vectorized single precision natural logarithmic function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_logf4_u35(vector float a);
vector float Sleef_logf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_logf_u35 with the same accuracy specification.


Vectorized double precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_log10d2_u10(vector double a);
vector double Sleef_log10d2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10_u10 with the same accuracy specification.


Vectorized single precision base-10 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_log10f4_u10(vector float a);
vector float Sleef_log10f4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log10f_u10 with the same accuracy specification.


Vectorized double precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_log2d2_u10(vector double a);
vector double Sleef_log2d2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2_u10 with the same accuracy specification.


Vectorized single precision base-2 logarithmic function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_log2f4_u10(vector float a);
vector float Sleef_log2f4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log2f_u10 with the same accuracy specification.


Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_log1pd2_u10(vector double a);
vector double Sleef_log1pd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1p_u10 with the same accuracy specification.


Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_log1pf4_u10(vector float a);
vector float Sleef_log1pf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_log1pf_u10 with the same accuracy specification.


Vectorized double precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_expd2_u10(vector double a);
vector double Sleef_expd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp_u10 with the same accuracy specification.


Vectorized single precision base-e exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_expf4_u10(vector float a);
vector float Sleef_expf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expf_u10 with the same accuracy specification.


Vectorized double precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_exp2d2_u10(vector double a);
vector double Sleef_exp2d2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2_u10 with the same accuracy specification.


Vectorized single precision base-2 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_exp2f4_u10(vector float a);
vector float Sleef_exp2f4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp2f_u10 with the same accuracy specification.


Vectorized double precision base-10 exponential function function with 1.09 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_exp10d2_u10(vector double a);
vector double Sleef_exp10d2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10_u10 with the same accuracy specification.


Vectorized single precision base-10 exponential function function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_exp10f4_u10(vector float a);
vector float Sleef_exp10f4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_exp10f_u10 with the same accuracy specification.


Vectorized double precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_expm1d2_u10(vector double a);
vector double Sleef_expm1d2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1_u10 with the same accuracy specification.


Vectorized single precision base-e exponential function minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_expm1f4_u10(vector float a);
vector float Sleef_expm1f4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expm1f_u10 with the same accuracy specification.


Vectorized double precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_sqrtd2(vector double a);
vector double Sleef_sqrtd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrt_u05 with the same accuracy specification.


Vectorized single precision square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_sqrtf4(vector float a);
vector float Sleef_sqrtf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u05 with the same accuracy specification.


Vectorized double precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_sqrtd2_u35(vector double a);
vector double Sleef_sqrtd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrt_u35 with the same accuracy specification.


Vectorized single precision square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_sqrtf4_u35(vector float a);
vector float Sleef_sqrtf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sqrtf_u35 with the same accuracy specification.


Vectorized double precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_cbrtd2_u10(vector double a);
vector double Sleef_cbrtd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrt_u10 with the same accuracy specification.


Vectorized single precision cubic root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_cbrtf4_u10(vector float a);
vector float Sleef_cbrtf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u10 with the same accuracy specification.


Vectorized double precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_cbrtd2_u35(vector double a);
vector double Sleef_cbrtd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrt_u35 with the same accuracy specification.


Vectorized single precision cubic root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_cbrtf4_u35(vector float a);
vector float Sleef_cbrtf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cbrtf_u35 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_hypotd2_u05(vector double a, vector double b);
vector double Sleef_hypotd2_u05vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypot_u05 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_hypotf4_u05(vector float a, vector float b);
vector float Sleef_hypotf4_u05vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u05 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_hypotd2_u35(vector double a, vector double b);
vector double Sleef_hypotd2_u35vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypot_u35 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_hypotf4_u35(vector float a, vector float b);
vector float Sleef_hypotf4_u35vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_hypotf_u35 with the same accuracy specification.

Inverse Trigonometric Functions

Vectorized double precision arc sine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_asind2_u10(vector double a);
vector double Sleef_asind2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asin_u10 with the same accuracy specification.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_asinf4_u10(vector float a);
vector float Sleef_asinf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u10 with the same accuracy specification.


Vectorized double precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_asind2_u35(vector double a);
vector double Sleef_asind2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asin_u35 with the same accuracy specification.


Vectorized single precision arc sine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_asinf4_u35(vector float a);
vector float Sleef_asinf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinf_u35 with the same accuracy specification.


Vectorized double precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_acosd2_u10(vector double a);
vector double Sleef_acosd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acos_u10 with the same accuracy specification.


Vectorized single precision arc cosine function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_acosf4_u10(vector float a);
vector float Sleef_acosf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u10 with the same accuracy specification.


Vectorized double precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_acosd2_u35(vector double a);
vector double Sleef_acosd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acos_u35 with the same accuracy specification.


Vectorized single precision arc cosine function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_acosf4_u35(vector float a);
vector float Sleef_acosf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosf_u35 with the same accuracy specification.


Vectorized double precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_atand2_u10(vector double a);
vector double Sleef_atand2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan_u10 with the same accuracy specification.


Vectorized single precision arc tangent function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_atanf4_u10(vector float a);
vector float Sleef_atanf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u10 with the same accuracy specification.


Vectorized double precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_atand2_u35(vector double a);
vector double Sleef_atand2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan_u35 with the same accuracy specification.


Vectorized single precision arc tangent function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_atanf4_u35(vector float a);
vector float Sleef_atanf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanf_u35 with the same accuracy specification.


Vectorized double precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_atan2d2_u10(vector double a, vector double b);
vector double Sleef_atan2d2_u10vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2_u10 with the same accuracy specification.


Vectorized single precision arc tangent function of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_atan2f4_u10(vector float a, vector float b);
vector float Sleef_atan2f4_u10vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u10 with the same accuracy specification.


Vectorized double precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector double Sleef_atan2d2_u35(vector double a, vector double b);
vector double Sleef_atan2d2_u35vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2_u35 with the same accuracy specification.


Vectorized single precision arc tangent function of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>

vector float Sleef_atan2f4_u35(vector float a, vector float b);
vector float Sleef_atan2f4_u35vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atan2f_u35 with the same accuracy specification.

Hyperbolic function and inverse hyperbolic function

Vectorized double precision hyperbolic sine function

Synopsis

#include <sleef.h>

vector double Sleef_sinhd2_u10(vector double a);
vector double Sleef_sinhd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

vector float Sleef_sinhf4_u10(vector float a);
vector float Sleef_sinhf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic sine function

Synopsis

#include <sleef.h>

vector double Sleef_sinhd2_u35(vector double a);
vector double Sleef_sinhd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic sine function

Synopsis

#include <sleef.h>

vector float Sleef_sinhf4_u35(vector float a);
vector float Sleef_sinhf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_sinhf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic cosine function

Synopsis

#include <sleef.h>

vector double Sleef_coshd2_u10(vector double a);
vector double Sleef_coshd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

vector float Sleef_coshf4_u10(vector float a);
vector float Sleef_coshf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic cosine function

Synopsis

#include <sleef.h>

vector double Sleef_coshd2_u35(vector double a);
vector double Sleef_coshd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_cosh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic cosine function

Synopsis

#include <sleef.h>

vector float Sleef_coshf4_u35(vector float a);
vector float Sleef_coshf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_coshf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic tangent function

Synopsis

#include <sleef.h>

vector double Sleef_tanhd2_u10(vector double a);
vector double Sleef_tanhd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

vector float Sleef_tanhf4_u10(vector float a);
vector float Sleef_tanhf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic tangent function

Synopsis

#include <sleef.h>

vector double Sleef_tanhd2_u35(vector double a);
vector double Sleef_tanhd2_u35vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic tangent function

Synopsis

#include <sleef.h>

vector float Sleef_tanhf4_u35(vector float a);
vector float Sleef_tanhf4_u35vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tanhf_u35 with the same accuracy specification.


Vectorized double precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

vector double Sleef_asinhd2_u10(vector double a);
vector double Sleef_asinhd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic sine function

Synopsis

#include <sleef.h>

vector float Sleef_asinhf4_u10(vector float a);
vector float Sleef_asinhf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_asinhf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

vector double Sleef_acoshd2_u10(vector double a);
vector double Sleef_acoshd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acosh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic cosine function

Synopsis

#include <sleef.h>

vector float Sleef_acoshf4_u10(vector float a);
vector float Sleef_acoshf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_acoshf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

vector double Sleef_atanhd2_u10(vector double a);
vector double Sleef_atanhd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic tangent function

Synopsis

#include <sleef.h>

vector float Sleef_atanhf4_u10(vector float a);
vector float Sleef_atanhf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_atanhf_u10 with the same accuracy specification.

Error and gamma function

Vectorized double precision error function

Synopsis

#include <sleef.h>

vector double Sleef_erfd2_u10(vector double a);
vector double Sleef_erfd2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erf_u10 with the same accuracy specification.


Vectorized single precision error function

Synopsis

#include <sleef.h>

vector float Sleef_erff4_u10(vector float a);
vector float Sleef_erff4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erff_u10 with the same accuracy specification.


Vectorized double precision complementary error function

Synopsis

#include <sleef.h>

vector double Sleef_erfcd2_u15(vector double a);
vector double Sleef_erfcd2_u15vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfc_u15 with the same accuracy specification.


Vectorized single precision complementary error function

Synopsis

#include <sleef.h>

vector float Sleef_erfcf4_u15(vector float a);
vector float Sleef_erfcf4_u15vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_erfcf_u15 with the same accuracy specification.


Vectorized double precision gamma function

Synopsis

#include <sleef.h>

vector double Sleef_tgammad2_u10(vector double a);
vector double Sleef_tgammad2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgamma_u10 with the same accuracy specification.


Vectorized single precision gamma function

Synopsis

#include <sleef.h>

vector float Sleef_tgammaf4_u10(vector float a);
vector float Sleef_tgammaf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_tgammaf_u10 with the same accuracy specification.


Vectorized double precision log gamma function

Synopsis

#include <sleef.h>

vector double Sleef_lgammad2_u10(vector double a);
vector double Sleef_lgammad2_u10vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgamma_u10 with the same accuracy specification.


Vectorized single precision log gamma function

Synopsis

#include <sleef.h>

vector float Sleef_lgammaf4_u10(vector float a);
vector float Sleef_lgammaf4_u10vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_lgammaf_u10 with the same accuracy specification.

Nearest integer function

Vectorized double precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

vector double Sleef_truncd2(vector double a);
vector double Sleef_truncd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_trunc with the same accuracy specification.


Vectorized single precision function for rounding to integer towards zero

Synopsis

#include <sleef.h>

vector float Sleef_truncf4(vector float a);
vector float Sleef_truncf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_truncf with the same accuracy specification.


Vectorized double precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

vector double Sleef_floord2(vector double a);
vector double Sleef_floord2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floor with the same accuracy specification.


Vectorized single precision function for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

vector float Sleef_floorf4(vector float a);
vector float Sleef_floorf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_floorf with the same accuracy specification.


Vectorized double precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

vector double Sleef_ceild2(vector double a);
vector double Sleef_ceild2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceil with the same accuracy specification.


Vectorized single precision function for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

vector float Sleef_ceilf4(vector float a);
vector float Sleef_ceilf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ceilf with the same accuracy specification.


Vectorized double precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

vector double Sleef_roundd2(vector double a);
vector double Sleef_roundd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_round with the same accuracy specification.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

vector float Sleef_roundf4(vector float a);
vector float Sleef_roundf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_roundf with the same accuracy specification.


Vectorized double precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

vector double Sleef_rintd2(vector double a);
vector double Sleef_rintd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rint with the same accuracy specification.


Vectorized single precision function for rounding to nearest integer

Synopsis

#include <sleef.h>

vector float Sleef_rintf4(vector float a);
vector float Sleef_rintf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_rintf with the same accuracy specification.

Other function

Vectorized double precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

vector double Sleef_fmad2(vector double a, vector double b, vector double c);
vector double Sleef_fmad2_vsx(vector double a, vector double b, vector double c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fma with the same accuracy specification.


Vectorized single precision function for fused multiply-accumulation

Synopsis

#include <sleef.h>

vector float Sleef_fmaf4(vector float a, vector float b, vector float c);
vector float Sleef_fmaf4_vsx(vector float a, vector float b, vector float c);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

vector double Sleef_fmodd2(vector double a, vector double b);
vector double Sleef_fmodd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmod with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

vector float Sleef_fmodf4(vector float a, vector float b);
vector float Sleef_fmodf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmodf with the same accuracy specification.


Vectorized double precision function for multiplying by integral power of 2

Synopsis

#include <sleef.h>

vector double Sleef_ldexpd2(vector double a, vector int b);
vector double Sleef_ldexpd2_vsx(vector double a, vector int b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ldexp with the same accuracy specification.


Vectorized double precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

vector double Sleef_frfrexpd2(vector double a);
vector double Sleef_frfrexpd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexp with the same accuracy specification.


Vectorized single precision function for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

vector float Sleef_frfrexpf4(vector float a);
vector float Sleef_frfrexpf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_frfrexpf with the same accuracy specification.


Vectorized double precision function for obtaining integral component of an FP number

Synopsis

#include <sleef.h>

vector int Sleef_expfrexpd2(vector double a);
vector int Sleef_expfrexpd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_expfrexp with the same accuracy specification.


Vectorized double precision function for getting integer exponent

Synopsis

#include <sleef.h>

vector int Sleef_ilogbd2(vector double a);
vector int Sleef_ilogbd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_ilogb with the same accuracy specification.


Vectorized double precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_vector_double_2 Sleef_modfd2(vector double a);
Sleef_vector_double_2 Sleef_modfd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modf with the same accuracy specification.


Vectorized single precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef_vector_float_2 Sleef_modff4(vector float a);
Sleef_vector_float_2 Sleef_modff4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_modff with the same accuracy specification.


Vectorized double precision function for calculating the absolute value

Synopsis

#include <sleef.h>

vector double Sleef_fabsd2(vector double a);
vector double Sleef_fabsd2_vsx(vector double a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabs with the same accuracy specification.


Vectorized single precision function for calculating the absolute value

Synopsis

#include <sleef.h>

vector float Sleef_fabsf4(vector float a);
vector float Sleef_fabsf4_vsx(vector float a);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fabsf with the same accuracy specification.


Vectorized double precision function for copying signs

Synopsis

#include <sleef.h>

vector double Sleef_copysignd2(vector double a, vector double b);
vector double Sleef_copysignd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysign with the same accuracy specification.


Vectorized single precision function for copying signs

Synopsis

#include <sleef.h>

vector float Sleef_copysignf4(vector float a, vector float b);
vector float Sleef_copysignf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_copysignf with the same accuracy specification.


Vectorized double precision function for determining maximum of two values

Synopsis

#include <sleef.h>

vector double Sleef_fmaxd2(vector double a, vector double b);
vector double Sleef_fmaxd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmax with the same accuracy specification.


Vectorized single precision function for determining maximum of two values

Synopsis

#include <sleef.h>

vector float Sleef_fmaxf4(vector float a, vector float b);
vector float Sleef_fmaxf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmaxf with the same accuracy specification.


Vectorized double precision function for determining minimum of two values

Synopsis

#include <sleef.h>

vector double Sleef_fmind2(vector double a, vector double b);
vector double Sleef_fmind2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fmin with the same accuracy specification.


Vectorized single precision function for determining minimum of two values

Synopsis

#include <sleef.h>

vector float Sleef_fminf4(vector float a, vector float b);
vector float Sleef_fminf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fminf with the same accuracy specification.


Vectorized double precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

vector double Sleef_fdimd2(vector double a, vector double b);
vector double Sleef_fdimd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdim with the same accuracy specification.


Vectorized single precision function to calculate positive difference of two values

Synopsis

#include <sleef.h>

vector float Sleef_fdimf4(vector float a, vector float b);
vector float Sleef_fdimf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_fdimf with the same accuracy specification.


Vectorized double precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

vector double Sleef_nextafterd2(vector double a, vector double b);
vector double Sleef_nextafterd2_vsx(vector double a, vector double b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafter with the same accuracy specification.


Vectorized single precision function for obtaining the next representable FP value

Synopsis

#include <sleef.h>

vector float Sleef_nextafterf4(vector float a, vector float b);
vector float Sleef_nextafterf4_vsx(vector float a, vector float b);

Link with -lsleef.

Description

This is the vectorized function of Sleef_nextafterf with the same accuracy specification.

sleef-3.3.1/doc/html/purec.xhtml000066400000000000000000002237301333715643700165440ustar00rootroot00000000000000 SLEEF Documentation

SLEEF Documentation - Math library reference

Table of contents

Data types

Sleef_double2

Description

Sleef_double2 is a generic data type for storing two double-precision floating point values, which is defined in <sleef.h> as follows:

typedef struct {
      double x, y;
} Sleef_double2;

Sleef_float2

Description

Sleef_float2 is a generic data type for storing two single-precision floating point values, which is defined in <sleef.h> as follows:

typedef struct {
      float x, y;
} Sleef_float2;

Sleef_longdouble2

Description

Sleef_longdouble2 is a generic data type for storing two extended-precision (80-bit) floating point values, which is defined in <sleef.h> as follows:

typedef struct {
      long double x, y;
} Sleef_longdouble2;

Trigonometric Functions

Sleef_sin_u10, Sleef_sinf_u10 - sine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sin_u10(double a);
float Sleef_sinf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the sine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_sin_u35, Sleef_sinf_u35 - sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sin_u35(double a);
float Sleef_sinf_u35(float a);

Link with -lsleef.

Description

These functions evaluates the sine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_cos_u10, Sleef_cosf_u10 - cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cos_u10(double a);
float Sleef_cosf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the cosine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_cos_u35, Sleef_cosf_u35 - cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cos_u35(double a);
float Sleef_cosf_u35(float a);

Link with -lsleef.

Description

These functions evaluates the cosine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_sincos_u10, Sleef_sincosf_u10 - evaluate sine and cosine functions simultaneously with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincos_u10(double a)
Sleef_float2 Sleef_sincosf_u10(float a)

Link with -lsleef.

Description

Evaluates the sine and cosine functions of a value in a at a time, and store the two values in x and y elements in the returned value, respectively. The error bound of the returned values is 1.0 ULP. If a is a NaN or infinity, a NaN is returned.


Sleef_sincos_u35, Sleef_sincosf_u35 - evaluate sine and cosine functions simultaneously with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincos_u35(double a)
Sleef_float2 Sleef_sincosf_u35(float a)

Link with -lsleef.

Description

Evaluates the sine and cosine functions of a value in a at a time, and store the two values in x and y elements in the returned value, respectively. The error bound of the returned values is 3.5 ULP. If a is a NaN or infinity, a NaN is returned.


Sleef_sincospi_u05, Sleef_sincospif_u05, Sleef_sincospil_u05 - evaluate sin( πa ) and cos( πa ) for given a simultaneously with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincospi_u05(double a)
Sleef_float2 Sleef_sincospif_u05(float a)
Sleef_longdouble2 Sleef_sincospil_u05(long double a)

Link with -lsleef.

Description

Evaluates the sine and cosine functions of πa at a time, and store the two values in x and y elements in the returned value, respectively. The error bound of the returned value are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if [-1e+7, 1e+7] for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. If a is a NaN or infinity, a NaN is returned.


Sleef_sincospi_u35, Sleef_sincospif_u35, Sleef_sincospil_u35 - evaluate sin( πa ) and cos( πa ) for given a simultaneously with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_sincospi_u35(double a)
Sleef_float2 Sleef_sincospif_u35(float a)
Sleef_longdouble2 Sleef_sincospil_u35(long double a)

Link with -lsleef.

Description

Evaluates the sine and cosine functions of πa at a time, and store the two values in x and y elements in the returned value, respectively. The error bound of the returned values is 3.5 ULP if a is in [-1e+9, 1e+9] for double-precision function or [-1e+7, 1e+7] for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. If a is a NaN or infinity, a NaN is returned.


Sleef_sinpi_u05, Sleef_sinpif_u05 - evaluate sin( πa ) for given a with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sinpi_u05(double a);
float Sleef_sinpif_u05(float a);

Link with -lsleef.

Description

These functions evaluates the sine functions of πa . The error bound of the returned value are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if [-1e+7, 1e+7] for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. If a is a NaN or infinity, a NaN is returned.


Sleef_cospi_u05, Sleef_cospif_u05 - evaluate cos( πa ) for given a with 0.506 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cospi_u05(double a);
float Sleef_cospif_u05(float a);

Link with -lsleef.

Description

These functions evaluates the cosine functions of πa . The error bound of the returned value are max(0.506 ULP, DBL_MIN) if a is in [-1e+9, 1e+9] for double-precision function, or max(0.506 ULP, FLT_MIN) if [-1e+7, 1e+7] for the single-precision function. If a is a finite value out of this range, an arbitrary value within [-1, 1] is returned. If a is a NaN or infinity, a NaN is returned.


Sleef_tan_u10, Sleef_tanf_u10 - tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_tan_u10(double a);
float Sleef_tanf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the tangent function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_tan_u35, Sleef_tanf_u35 - tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_tan_u35(double a);
float Sleef_tanf_u35(float a);

Link with -lsleef.

Description

These functions evaluates the tangent function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.

Power, exponential, and logarithmic functions

Sleef_pow_u10, Sleef_powf_u10 - power functions

Synopsis

#include <sleef.h>

double Sleef_pow_u10(double x, double y);
float Sleef_powf_u10(float x, float y);

Link with -lsleef.

Description

These functions return the value of x raised to the power of y. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_log_u10, Sleef_logf_u10 - natural logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log_u10(double a);
float Sleef_logf_u10(float a);

Link with -lsleef.

Description

These functions return the natural logarithm of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_log_u35, Sleef_logf_u35 - natural logarithmic functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_log_u35(double a);
float Sleef_logf_u35(float a);

Link with -lsleef.

Description

These functions return the natural logarithm of a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_log10_u10, Sleef_log10f_u10 - base-10 logarithmic functions

Synopsis

#include <sleef.h>

double Sleef_log10_u10(double a);
float Sleef_log10f_u10(float a);

Link with -lsleef.

Description

These functions return the base-10 logarithm of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_log2_u10, Sleef_log2f_u10 - base-10 logarithmic functions

Synopsis

#include <sleef.h>

double Sleef_log2_u10(double a);
float Sleef_log2f_u10(float a);

Link with -lsleef.

Description

These functions return the base-2 logarithm of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_log1p_u10, Sleef_log1pf_u10 - logarithm of one plus argument

Synopsis

#include <sleef.h>

double Sleef_log1p_u10(double a);
float Sleef_log1pf_u10(float a);

Link with -lsleef.

Description

These functions return the natural logarithm of (1+a). The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_exp_u10, Sleef_expf_u10 - base-e exponential functions

Synopsis

#include <sleef.h>

double Sleef_exp_u10(double a);
float Sleef_expf_u10(float a);

Link with -lsleef.

Description

These functions return the value of e raised to a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_exp2_u10, Sleef_exp2f_u10 - base-2 exponential functions

Synopsis

#include <sleef.h>

double Sleef_exp2_u10(double a);
float Sleef_exp2f_u10(float a);

Link with -lsleef.

Description

These functions return 2 raised to a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_exp10_u10, Sleef_exp10f_u10 - base-10 exponential functions

Synopsis

#include <sleef.h>

double Sleef_exp10_u10(double a);
float Sleef_exp10f_u10(float a);

Link with -lsleef.

Description

These functions return 10 raised to a. The error bound of the returned value is 1.09 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_expm1_u10, Sleef_expm1f_u10 - base-e exponential functions minus 1

Synopsis

#include <sleef.h>

double Sleef_expm1_u10(double a);
float Sleef_expm1f_u10(float a);

Link with -lsleef.

Description

These functions return the value one less than e raised to a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_sqrt_u05, Sleef_sqrtf_u05 - square root function with 0.5001 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_sqrt_u05(double x);
float Sleef_sqrtf_u05(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of sqrt and sqrtf functions. The error bound of the returned value is 0.5001 ULP. These functions do not set errno nor raise an exception.


Sleef_sqrtf_u35 - square root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

float Sleef_sqrt_u35(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of sqrt and sqrtf functions. The error bound of the returned value is 3.5 ULP. These functions do not set errno nor raise an exception.


Sleef_cbrt_u10, Sleef_cbrtf_u10 - cube root function with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cbrt_u10(double a);
float Sleef_cbrtf_u10(float a);

Link with -lsleef.

Description

These functions return the real cube root of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_cbrt_u35, Sleef_cbrtf_u35 - cube root function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_cbrt_u35(double a);
float Sleef_cbrtf_u35(float a);

Link with -lsleef.

Description

These functions return the real cube root of a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_hypot_u05, Sleef_hypotf_u05 - 2D Euclidian distance function with 0.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_hypot_u05(double x, double y);
float Sleef_hypotf_u05(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of hypot and hypotf functions. The error bound of the returned value is 0.5001 ULP. These functions do not set errno nor raise an exception.


Sleef_hypot_u35, Sleef_hypotf_u35 - 2D Euclidian distance function with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_hypot_u35(double x, double y);
float Sleef_hypotf_u35(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of hypot and hypotf functions. The error bound of the returned value is 0.5001 ULP. These functions do not set errno nor raise an exception.

Inverse Trigonometric Functions

Sleef_asin_u10, Sleef_asinf_u10 - arc sine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_asin_u10(double a);
float Sleef_asinf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the arc sine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_asin_u35, Sleef_asinf_u35 - arc sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_asin_u35(double a);
float Sleef_asinf_u35(float a);

Link with -lsleef.

Description

These functions evaluates the arc sine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_acos_u10, Sleef_acosf_u10 - arc cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_acos_u10(double a);
float Sleef_acosf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the arc cosine function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_acos_u35, Sleef_acosf_u35 - arc cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_acos_u35(double a);
float Sleef_acosf_u35(float a);

Link with -lsleef.

Description

These functions evaluates the arc cosine function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_atan_u10, Sleef_atanf_u10 - arc tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan_u10(double a);
float Sleef_atanf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the arc tangent function of a value in a. The error bound of the returned value is 1.0 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_atan_u35, Sleef_atanf_u35 - arc tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan_u35(double a);
float Sleef_atanf_u35(float a);

Link with -lsleef.

Description

These functions evaluates the arc tangent function of a value in a. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_atan2_u10, Sleef_atan2f_u10 - arc tangent functions of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan2_u10(double y, double x);
float Sleef_atan2f_u10(float y, float x);

Link with -lsleef.

Description

These functions evaluates the arc tangent function of (y / x). The quadrant of the result is determined according to the signs of x and y. The error bounds of the returned values are max(1.0 ULP, DBL_MIN) and max(1.0 ULP, FLT_MIN), respectively. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_atan2_u35, Sleef_atan2f_u35 - arc tangent functions of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_atan2_u35(double y, double x);
float Sleef_atan2f_u35(float y, float x);

Link with -lsleef.

Description

These functions evaluates the arc tangent function of (y / x). The quadrant of the result is determined according to the signs of x and y. The error bound of the returned value is 3.5 ULP. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.

Hyperbolic functions and inverse hyperbolic functions

Sleef_sinh_u10, Sleef_sinhf_u10 - hyperbolic sine functions

Synopsis

#include <sleef.h>

double Sleef_sinh_u10(double a);
float Sleef_sinhf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the hyperbolic sine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-709, 709] for the double-precision function or [-88.5, 88.5] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_sinh_u35, Sleef_sinhf_u35 - hyperbolic sine functions

Synopsis

#include <sleef.h>

double Sleef_sinh_u35(double a);
float Sleef_sinhf_u35(float a);

Link with -lsleef.

Description

These functions evaluates the hyperbolic sine function of a value in a. The error bound of the returned value is 3.5 ULP if a is in [-709, 709] for the double-precision function or [-88, 88] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 3.5 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_cosh_u10, Sleef_coshf_u10 - hyperbolic cosine functions

Synopsis

#include <sleef.h>

double Sleef_cosh_u10(double a);
float Sleef_coshf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the hyperbolic cosine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-709, 709] for the double-precision function or [-88.5, 88.5] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_cosh_u35, Sleef_coshf_u35 - hyperbolic cosine functions

Synopsis

#include <sleef.h>

double Sleef_cosh_u35(double a);
float Sleef_coshf_u35(float a);

Link with -lsleef.

Description

These functions evaluates the hyperbolic cosine function of a value in a. The error bound of the returned value is 3.5 ULP if a is in [-709, 709] for the double-precision function or [-88, 88] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 3.5 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_tanh_u10, Sleef_tanhf_u10 - hyperbolic tangent functions

Synopsis

#include <sleef.h>

double Sleef_tanh_u10(double a);
float Sleef_tanhf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the hyperbolic tangent function of a value in a. The error bound of the returned value is 1.0 ULP for the double-precision function or 1.0001 ULP for the single-precision function. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_tanh_u35, Sleef_tanhf_u35 - hyperbolic tangent functions

Synopsis

#include <sleef.h>

double Sleef_tanh_u35(double a);
float Sleef_tanhf_u35(float a);

Link with -lsleef.

Description

These functions evaluates the hyperbolic tangent function of a value in a. The error bound of the returned value is 3.5 ULP for the double-precision function or 3.5 ULP for the single-precision function. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_asinh_u10, Sleef_asinhf_u10 - inverse hyperbolic sine functions

Synopsis

#include <sleef.h>

double Sleef_asinh_u10(double a);
float Sleef_asinhf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the inverse hyperbolic sine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-1.34e+154, 1.34e+154] for the double-precision function or 1.001 ULP if a is in [-1.84e+19, 1.84e+19] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_acosh_u10, Sleef_acoshf_u10 - inverse hyperbolic cosine functions

Synopsis

#include <sleef.h>

double Sleef_acosh_u10(double a);
float Sleef_acoshf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the inverse hyperbolic cosine function of a value in a. The error bound of the returned value is 1.0 ULP if a is in [-1.34e+154, 1.34e+154] for the double-precision function or 1.001 ULP if a is in [-1.84e+19, 1.84e+19] for the single-precision function . If a is a finite value out of this range, infinity with a correct sign or a correct value with 1.0 ULP error bound is returned. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_atanh_u10, Sleef_atanhf_u10 - inverse hyperbolic tangent functions

Synopsis

#include <sleef.h>

double Sleef_atanh_u10(double a);
float Sleef_atanhf_u10(float a);

Link with -lsleef.

Description

These functions evaluates the inverse hyperbolic tangent function of a value in a. The error bound of the returned value is 1.0 ULP for the double-precision function or 1.0001 ULP for the single-precision function. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.

Error and gamma functions

Sleef_erf_u10, Sleef_erff_u10 - error functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_erf_u10(double x);
float Sleef_erff_u10(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of erf and erff functions. The error bound of the returned value is 1.0 ULP. These functions do not set errno nor raise an exception.


Sleef_erfc_u15, Sleef_erfcf_u15 - complementary error functions

Synopsis

#include <sleef.h>

double Sleef_erfc_u15(double x);
float Sleef_erfcf_u15(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of erfc and erfcf functions. The error bound of the returned value for the DP function is max(1.5 ULP, DBL_MIN) if the argument is less than 26.2, and max(2.5 ULP, DBL_MIN) otherwise. For the SP function, the error bound is max(1.5 ULP, FLT_MIN). These functions do not set errno nor raise an exception.


Sleef_tgamma_u10, Sleef_tgammaf_u10 - gamma functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_tgamma_u10(double x);
float Sleef_tgammaf_u10(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of tgamma and tgammaf functions. The error bound of the returned value is 1.0 ULP. These functions do not set errno nor raise an exception.


Sleef_lgamma_u10, Sleef_lgammaf_u10 - log gamma functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

double Sleef_lgamma_u10(double x);
float Sleef_lgammaf_u10(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of lgamma and lgammaf functions. The error bound of the returned value is 1.0 ULP if the argument is positive. If the argument is larger than 2e+305 for the DP function and 4e+36 for the SP function, it may return infinity instead of the correct value. The error bound is max(1 ULP, 1e-15) for the DP function and max(1 ULP and 1e-8) for the SP function, if the argument is negative. These functions do not set errno nor raise an exception.

Nearest integer functions

Sleef_trunc, Sleef_truncf - round to integer towards zero

Synopsis

#include <sleef.h>

double Sleef_trunc(double x);
float Sleef_truncf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of trunc and truncf functions. These functions do not set errno nor raise an exception.


Sleef_floor, Sleef_floorf - round to integer towards minus infinity

Synopsis

#include <sleef.h>

double Sleef_floor(double x);
float Sleef_floorf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of floor and floorf functions. These functions do not set errno nor raise an exception.


Sleef_ceil, Sleef_ceilf - round to integer towards plus infinity

Synopsis

#include <sleef.h>

double Sleef_ceil(double x);
float Sleef_ceilf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of ceil and ceilf functions. These functions do not set errno nor raise an exception.


Sleef_round, Sleef_roundf - round to integer away from zero

Synopsis

#include <sleef.h>

double Sleef_round(double x);
float Sleef_roundf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of round and roundf functions. These functions do not set errno nor raise an exception.


Sleef_rint, Sleef_rintf - round to integer, ties round to even

Synopsis

#include <sleef.h>

double Sleef_rint(double x);
float Sleef_rintf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of rint and rintf functions. These functions do not set errno nor raise an exception.

Other functions

Sleef_fma, Sleef_fmaf - fused multiply and accumulate

Synopsis

#include <sleef.h>

double Sleef_fma(double x, double y, double z);
float Sleef_fmaf(float x, float y, float z);

Link with -lsleef.

Description

These functions compute (x × y + z) without rounding, and then return the rounded value of the result. These functions may return infinity with a correct sign if the absolute value of the correct return value is greater than 1e+300 and 1e+33, respectively. The error bounds of the returned values are 0.5 ULP and max(0.50001 ULP, FLT_MIN), respectively.


Sleef_fmod, Sleef_fmodf - FP remainder

Synopsis

#include <sleef.h>

double Sleef_fmod(double x, double y);
float Sleef_fmodf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of fmod and fmodf functions, if |x / y| is smaller than 1e+300 and 1e+38, respectively. The returned value is undefined, otherwise. These functions do not set errno nor raise an exception.


Sleef_ldexp, Sleef_ldexpf - multiply by integral power of 2

Synopsis

#include <sleef.h>

double Sleef_ldexp(double m, int x);
float Sleef_ldexpf(float m, int x);

Link with -lsleef.

Description

These functions return the result of multiplying m by 2 raised to the power x. These functions treat the non-number arguments and return non-numbers as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_frfrexp, Sleef_frfrexpf - fractional component of an FP number

Synopsis

#include <sleef.h>

double Sleef_frfrexp(double x);
float Sleef_frfrexpf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of frexp and frexpf functions. These functions do not set errno nor raise an exception.


Sleef_expfrexp, Sleef_expfrexpf - exponent of an FP number

Synopsis

#include <sleef.h>

int Sleef_expfrexp(double x);
int Sleef_expfrexpf(float x);

Link with -lsleef.

Description

These functions return the exponent returned by frexp and frexpf functions as specified in the C99 specification. These functions do not set errno nor raise an exception.


Sleef_ilogb, Sleef_ilogbf - integer exponent of an FP number

Synopsis

#include <sleef.h>

int Sleef_ilogb(double m, int x);
int Sleef_ilogbf(float m, int x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of ilogb and ilogbf functions. These functions do not set errno nor raise an exception.


Sleef_modf, Sleef_modff - integral and fractional value of FP number

Synopsis

#include <sleef.h>

Sleef_double2 Sleef_modf(double x);
Sleef_float2 Sleef_modff(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of modf and modff functions. These functions do not set errno nor raise an exception.


Sleef_fabs, Sleef_fabsf - absolute value

Synopsis

#include <sleef.h>

double Sleef_fabs(double x);
float Sleef_fabsf(float x);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of fabs and fabsf functions. These functions do not set errno nor raise an exception.


Sleef_fmax, Sleef_fmaxf - maximum of two numbers

Synopsis

#include <sleef.h>

double Sleef_fmax(double x, double y);
float Sleef_fmaxf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of fmax and fmaxf functions. These functions do not set errno nor raise an exception.


Sleef_fmin, Sleef_fminf - minimum of two numbers

Synopsis

#include <sleef.h>

double Sleef_fmin(double x, double y);
float Sleef_fminf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of fmin and fminf functions. These functions do not set errno nor raise an exception.


Sleef_fdim, Sleef_fdimf - positive difference

Synopsis

#include <sleef.h>

double Sleef_fdim(double x, double y);
float Sleef_fdimf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of fdim and fdimf functions. These functions do not set errno nor raise an exception.


Sleef_copysign, Sleef_copysignf - copy sign of a number

Synopsis

#include <sleef.h>

double Sleef_copysign(double x, double y);
float Sleef_copysignf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of copysign and copysignf functions. These functions do not set errno nor raise an exception.


Sleef_nextafter, Sleef_nextafterf - find the next representable FP value

Synopsis

#include <sleef.h>

double Sleef_nextafter(double x, double y);
float Sleef_nextafterf(float x, float y);

Link with -lsleef.

Description

These functions return the value as specified in the C99 specification of nextafter and nextafterf functions. These functions do not set errno nor raise an exception.

sleef-3.3.1/doc/html/sleef.css000066400000000000000000000032361333715643700161550ustar00rootroot00000000000000p.funcname { font-family: "Times New Roman", times, serif; font-size:1.2em; font-weight: normal; margin-top: 0.3cm; margin-bottom: 0.3cm; margin-left: 0.0cm; text-indent:0pt; } p.header { font-family: arial, sansserif; font-size:1.1em; font-weight: bold; margin-top: 1.0cm; margin-bottom: 0.3cm; margin-left: 0.0cm; text-indent:0pt; } p.synopsis { font-family: Ubuntu, arial, sansserif; font-size:1.0em; font-weight: normal; margin-top: 0.3cm; margin-bottom: 0.3cm; margin-left: 0.6cm; margin-right: 0.2cm; padding-left: 0.1cm; padding-right: 0.1cm; text-indent:0pt; } p.footer { color:#808080; font-family: "Times New Roman", times, serif; font-weight: normal; font-style: normal; font-size:0.7em; text-align:center; margin-top: 1.6cm; } pre.command { font-family: Consolas, Courier, sansserif; font-size: 12pt; background-color:#000000; color:#d0d0d0; margin: 1.0em 0.5cm 2.0em 0.5cm; padding: 0.1cm 0.2cm 0.1cm 0.2cm; box-shadow: 2px 2px 2px #888; white-space: pre-wrap; } pre.white { background-color:white; color:black; font-family: Ubuntu, arial, sansserif; } i.var { font-family: "Times New Roman", times, serif; font-weight: bold; color:#a00000; } i.math { font-family: "Times New Roman", times, serif; font-weight: normal; font-style:normal; } b.func { font-family: Ubuntu, arial, sansserif; font-weight: normal; color:#008040; } b.type { font-family: Ubuntu, arial, sansserif; font-weight: normal; color:#0050a0; } sleef-3.3.1/doc/html/sleeflogo2.png000066400000000000000000000544071333715643700171220ustar00rootroot00000000000000‰PNG  IHDRàÁÝU0PLTE(*'786EGDSURdfcrtqƒ€“•’¤¦£·¹µÆÈÅÖÙÕçéæýÿüÄ´½\ pHYs  šœX}IDATx^ìÒ!Â0Åñ&\`% µaÀ8 ƒ! Ã-hì,vG`ÁÔ“Ñ©e_@4¤)æýôËSAðK€ € @ûÐW­o†ÂXæ“4馃éêHA´—¥hé¬s àk .ÞQ €Ï#ñi˜S£Tµ‚«\ä¤RbJ—š8åC޹S$¾éeÍb&k bž‰t°!'wéAFÌÞÃelˆKú°íÍÎÝ„ÆU…a'±*Jå ˆ.Š:»pSnêÆM%Ý–”èÆ….~¤¥. #j‚ñ£bhb0 M)µ) M,jýBƒ†ŠÒ6˜DIˆ6ÍÜ›´ÉÌÜóØ&gæÞsç¼'÷*AÏ=\BÎož;0òÅÔz¼fVÚHœž«_)…B ”¼Ö@A)t‹XßÅÖúV›¦•žDCÐLÏŸ¾Ì3uG¹s= øÜ `ÎL¹óÔ´ÌU¡(R\À?ÿŸ èÎ 2쉱€öÝ:€Áª¯å¾G¶·…à]|èºüg:EªÌƒgüëÇ_i§únÿÄ£ÕJz v èÆºã8jßÉ5iV:ÉXæ×Áj¡-}€ïçÒØ-Àô’{i­;§ ’s!\:€ðÇ_Ê6Õ·õÕ ¿V²xq¿þb›¯ïá{¯—Íf9eñt·þ[®¹]]ÓПø`—Þ࣠×|låšmÙ¬p›ô{ØŸª²Ë ÀÜË"v;ž>».÷µS´{É÷G‡û^茨=¨›AÍ,OGN÷=ÕPõÝ~A‘2Sˆ$ý_FŽì >ÙCµn‹hð8!/tê×ï‰J°nþma¨öí#ayyŸ0TÉùC €ãš§çb’ß¶;€¦ìE¤K‚Bmƒ®ê>Ýüõb}]ðŒU_ 3@@~,ŒU €@0`^@U¹ÃÔ¥ð~húŠÁ‹éY¬·¹ â¾|€r/ ä)óªþ`´%õ_ž‚®=ÔØÝ†[¹êæ¬S‡ÅÙÈ/ùQõ8·ø |áF;Iµvð¾%Ù†˜.Q¨ÝHP? –  ÌóâgÀJŽ¿€:ÀpÕµ³k)Aßo7©¡–1$¨bý,€˜µ(ó,€8k±€XpÃMS­-±àUjèF$*o°,XÁˆYÀ%‹DÐå"Ì—.·qÏ÷V$ê¬@ô³¢`y@éY, fÀ¸uÛÔü6­º ±u™§’ߌ ÀeÀÏlþĈ¼Å"ðÀ˜;ðþ0ï¥÷X²ˆ à´ @é±öÛ, Î9€ X§q¹€çÌç˯jp–ðšÕ8Äø›Í¢,@ª¹µ§n MÊóð Õwɲ(;8—­–à_V ˆÂ(ýâÄJ““¾/‘z¬i» Íú‘T›MŸ2(‰’ N¦=€W¬   ??:RkxpxðØÈ¿ø»@ŸS­-†-`-à"Õ7†då¬.s–…ÀÀE» r `0Z£78xøè‘ë9ö½ÔâÝZwrH›™/±2IzVÑÅX±X €Wíol €³wv±mdU¿M ›ò!™"Dx#!!´HámwÙ‡ˆ§*âC+ÁC­f BK>$`Næ‰ÀCìq¶Š°“ÝmÅ“=Ûmá±î¦Õ®àa6l +»1_¡ù°ï0{|ææº¹ã̹¹×mþjUOe'jõÓïœsï ­Øö¢ãòy;oC|W7²$/¦eý|߀îRTÅ„÷ë`­`{ôØ.K¯Ä;|Ã&~碙kG à¾ÝÛ ØLj -³èñÉ¡!(àDßiÔÔ€î9|6’øt—ôðNɧ°*Á‹ðç rµw5€oàÐÁ7 »¥€ô/y;R–7ä,ê€ðÜ´þÜ‘à; ØH¨–í YKP‡_wÑÃ/ÞŠ7uï]:"ôàn߀î¸rkE,c(“`|p2ú*Çûu5 {߀î9|ºëФ•ü=³å=øR“€)[ܪô!X àEPÓÐÝ’àM ÜQ àæµö€Qðà3¦½ßs¶(rÆá-",Áwô7àÚ ¹-Á€tT%€´Ô)­ù¬a¤Ò†aŒÙ¢¬Ôå®Â ˆ»!P³2ŠoÀ½¾Ýy•^ƒÊZÌ­ˆ´ßvñÂ?ïD䟼O[ºKøº£øt×X±!y{%‡OàMq#ïažÒ¶t7Ц`ÈE ÜV`#Ϙ(wYê À¿éoÀm \—`ÀfB€´µþ2iX¶Ÿ€0í(2Óƒ®€®Súö€M Þ•`@w\€´Õz…×´ý,D°P‚½a)þ:B;®½Ý$Þ ÿl|ºXµ½dýÆoº½ íÇïÈ9Eüaø\O0¥#Ú÷€î ¾éˆ®«°™÷ýH4,[ÇqüÕk À¯DÞAÒØ€þ¦—¨º Þ—|Ù€ÿS -¶ù³ÌyZ*D†ÏÈc‘‡æ!M{@Þ Á5àž+óx§…ØqJ-Wðìñ9ÿÖÛ€<Bp è– …å—ÀJ‡8@. >ÐKãëu ºÂìôhÀ‡@í˰–‡uXú3z0xy Àè¸4}oÀ]UT`óàPÑfÀEÇK)TWчòBÔ­ÖGú°<6 -CÅõc1%Ø·€N>|}é@4䩨›q÷€ýe@8hÏû´{0¸ÈXÞ_¼ŠÑÎôø4¿¿€ýÚ’‡Ô€µL ‡ÐñR .LËÎ-_vãg¾G^þ££éææfÙ€›^êiÍ ä˜pÁð \ź).úRô6¨R›°qï‡&}Àa/_F6`㺗ºéu§ÈBLá \h_øOz‰P„¯’p†"ÎïÑÅ€PTO¡MÁ€Ôg øæéáÓÃWTHËN)€ ’â¦`á*4hLy¾†t 5úãÔh ¨K¸bZÇð åF €ö›1˜XvÖêLÁc"á:8˰»Ýøaòqñº~¼+ÀudBÃýŠ\£ ЗXÖ£ rB!7-„£1[½þX…%ýzÀ-m ¾$À㪠زüÙÙTÇ€Ö\f×3Áçã‘^ OEZ‡9©•×%pI†Ϫ2`¹EŒÀ€œÀEL€4×0VÅ '®DyºçI­zÀ›—aÀ¤²4 ˆ_L­¬pÎ1› °–=;é]¼„òx@ÈÇDCä7¼<£•çñ§`:*Á€Í„"VA€–Oa9@á xßñ™¬íeåÙ0Ÿ¸1¢¤<‹oÀ&‘`À]¢Æ€Ôq‚Î-^ 4}˜Æ2"s0€¡f/×1öâ ƒ1¬¥Ä€Í|w‰Þ%j X 0hà@3 8 0œÕËk(7CÎÄЖŠp7?oÉ0à†"–Kùvå4-ŸC¦ÖA¸À`X¶É­¡<›òdp›àp]†¯ª1`Ó)·_¦Z ãd´ ìØ.XÁùiXÏÕû©ÜàœQcÀªÓ1×Xxoâ^Õ„m¹&`øÀœÿž8c0ä[ýdÀ« —aÀQ%¤N) a…Î#xDšs”‚}g_†’Ý®ê÷\¬ãœ‰†|±Þ?=à8¾›I l$”°æäš!M_fK°Ç 0éÌ!±§ÈuÝ °à¸C$ð.QbÀJ‹Ÿ ´m†îl`” ±:W¦CðŸÇϺ÷€ÐÄ¢OÁD‚7ˆ ÒÐ1„±ðk1[éhüÁN¨Oy/_ÆxJ4›¾ÒÜ ø\’aÀy%¬»pÐçAÓ7ÁÈ`@hÆ8K1ã¤{>|¾/zÀ|éˆÒ¤–(s–Š»C“M¡„AÂŸŽ±% ásâ»u S<ú¼M$p›¨0`Ó)uÊ®Ðb+êt·å?v š½§3¼¤Š¥@POàãokßÞ"øü‘`ÀKD…+Ü‹™jC¯a¨µºuÁ É‚÷Óxq;ò‘—t7à<>€tT‚鈶Kè7€vРL^€­ä-à‡—Ímrÿ ü¨®g} ¾E$ðOD…-…e“ö…íeÒ‰u[æ 0Ó4ÂE»¸¿è6ô,9 O¯élÀ_tÒsрЪª0`µ`:\f3Až5-Ö€¼m@0à‚ÁcCî rP>¹ªoønÀ[D‚OT¶jh–,È0{ÿeÀe®sXÃZà î~0dð¼®lú²Â‚w“߀ï¢Â€–ÃÒ € †ùa²›Y¹ÅnÒŽ×^"fàÇzö€ô—„`°ù,Á7`u„(1`Åghß™ûÙ‹ÜQ‘Az&4`;N¸Ç);œ ¡«?#ØÒ·À©x|k„(1 uBš@4Ýy>ã×`Šö}4§­Ø'SÿHù‚n=àk?ü, LÁµâsIB° H;BˆÖZË@›'óY» %›ÏðÇÜÝÔÎy´{“ø|jM¹†!Ë€§áK>J q HVœò«Á/çwÏûßO‰+Ž[ÀWP»Uà‚Íf^ø4Îáü[D”Çëêz@>HòA0à‡B ¾™ Ò¶ÅXJ&:/Ó݇à#@À”ußþ+÷Ðù)åÌš  c æ‚c@>ª Øpœ"2 Û¹Ö~Ç8ÂÂm„LYÈ€/‚«I"ÊuU= þD‰+mŠÒŒ¦²¡bÜ}† ¤Ë2 90rŸŽu.ð "Ê“õÀ€{‹iÙq@UBÙ \ÊwÐ Þ:7Ñu„ÎÅ8ý"Ì·„<$l};gd ‹¤Æ=ŸKÚ´À…̃¡@ú"Ì÷úƀǬúu”[…Y0lÀɹîƒc3Ű˷€1ok„Nw@ä ´BË*·HÐ¥ wÆ›üYp´e§Œ; ÄAëÐk6¬*ýÝj‡»Qî!gbÀV†ßo›pÀEãˆÊŸI ¤'e–ɈªÐQg0œ- ¿ç°UØÑsÌxѸbš!“à•²)-«øýˆ6Ó¡{ˆ!¾,bvÀÏ ùBÁ7 f"@ÖC@¨ªâi#‘¢µL¹`Æ(€tÜaÀ–>€esÚP0‘Âñh„7¼7¾¦’pS‰(³! ´¬`op¨BÌ(«þë‘,â{ø‹bL îï¹î€×”ˆ2»CU‚(­4¸ÕvÀ ­‹9ümÊˤy°=žI¥Fœu@1 .Ƨ§†  (‘Ó:)Èà€xë€Øöà°Çv×Cdµå3X§DLë¼±0Ü-‰óXy³à-ä^: ˜—ÎÐ9^DØÀ 7%]7æ€ÛFyü ²9àß ƒ(&ÁÓE@ ý ö*ÙeŠ1¼¬@À2Ä Îj޹ê€bœиB> ¦Oñ×15 Ÿ§ XްeÁhH;ê¸" ž%&“[°¦P;{˜ÚS ÐÒ<ˆÖ»aVl`ÒaD\PpµlXó »R?9LfŒÉñÝð8 ¼z„À›hr×±\’ÜͰdÞ¡€p4,d\·fCge@¼á¬Yðy謢 SÂÔŒ„|°7RT΃&p‘@¼á¬8ÇvÖ‘Ïà&LQV`hž0UWýt;¢}>›!ñŸr€ø¹I·°¹—çe—3Tì =¦¨•Áõoй%Tcr@d×w³ØÀ°eÁ[ø¿sÒqI+n(L@ȧ=€u¾ð:'€8îý”Ó/âvÀu¸£29š· %¶ö³Ú~õrݾzÏOá߀@t.c'óu@¸‚ŠT²jÀy@€B“‚)Ñl1`;à ’¥‡Ù²`¬5W·ÀÇ)ÏÙ°Ðu½ß¨.Áü³aðõ±ˆÐ;Á耷yPè €ôdʲ U|ÀQí~¬¾Ù0—PçðSTŠØ¼ˆ‚ÃØÚ]U`‘º Ίê4€}nð~6 s8gÇà"*ElYðžÈQDŒË|Ï8à« çÜjºNrŀʎq˜]Lˆ’©ï¦ÀÜa³Wm…€iÇ4f¦jløOoOóˆ¦>ob(˜ãxî€E)€eK’kR8'¾¾'àŠ‘Ùœ`ð:lÌ‚ŸÒlON;àú€ùSXƒ‰KªmhN¡¹ ¦VŠ@Ìå€xÓIÀ»"ÍbQp@¨ìý)§Ï‡ë;P¡³²$LD°¾‡ð_k Þú¡p Ë!€aFÀIÍU½ÇésÁáÜSQ¤ÁZÕiì×kÉ?Á vWt[î¿Ãø1êïN; ”¬Á ¸¬Ð^µ \¡FXWor¥¼Cñˆ1¤ð¸f¾ßilï-«³§$kp! 5Í+xwDêŠ$g!¤]µN8¢Ù´ØãsˆŸI;퀭=_€§ã“à·ìñ5VËQEk:“7‡Ã5@É9z*΀Ó¸ Œp +°5ÍÅ—÷¡5êÐ“ŠžÕ™}îªÍ ¼Œ›$ÚÇ4»€u@”Õ,ÛS%P˜µÑ¯¾õúÔ’*V9è—€šûÕ}ဪ›à-Ä©C ½‘8€Óð‰XWmè+8 ÷$¹Ç¿¯#òª tBcN’Ê·r×bÃ#Ä °¬ÓŽ'§º–€×P‡ŽÏ‚‘ƒXTHî”æŒM¾g#½E –Kø.¹XzÈÜ fä°ÂCŸMÀÒ) €„€õª@è¹®ÒK\[ý€öYÈz’}²¶žVuݧ”S™øÿÛ·àún`wÀ¢Î)8¨¥—÷(ÏÒ¡½ñš`-^ib¹TMY?$ècbx±-w±’Wa`Ùªêû§£/)°·Â‘»ýA\pÔ ²,ЂÞ=÷*<ôP2b#dZ¥ ¸jÀº€½*½Xƒ ßò‚i(Mةʇ&A?`Ac-¥ÏHw OmÀ5³²„2´M-Ë„îSùŽÓزS¿1MÂBg!—i'Êyª™Š€ÐXü©*¨– ’IðB@›š“ºô$mmý>Ñ  ¼Óð–l¹Fzˆ2À,a˜ˆd•iù\þ<<¡{°CÁÎX ˆn,”ÞOÔõ” æË¬ @|&”–/náïˆ<úQ(šˆö¨§ž€Ø .å HB@»òåbüÎÞ2Ž\È*Tóš÷3žœ ¸*@»à[Œ¢ßµRÀµ€ã|í;ýGc^­úA›Ž¤bb^5:ïš žR=ŽK@RÀ¥M hc’ÇûìvtxG1SÀþ] ZÞU H)(ÚX…ä›ÖÀþÝ>ûgÊ|Ýh'LHÀuç& ¹a”# ÆÎYùæƒ!‹Ï€Ë•²b^â[ÅÉ¢uÇ& >fD¯€—ÁI… Òú\óÞLИ¢˜›€F'`Ò·œ€x/óXŽ„ÎPTíŸü5 Ð+|—I øÚhM°ìàüÏÞä6CaqÎÀ `ÁK¶4½@ÕŠ-JS!±Lk§°ca›Âºã¶HIáÍ„@¦;„4á¥h»©g<µÿ—}ÔÊŸÞË›y¶g©1¬úŒ¬‚ý¬'ñèEþªøØÝ¥TÇ­Öæå?Óêö}»åèCÙêCkÓɧµCÿôžÕßð¼B€c­å9€†«Ò+̲z?~ò´àíð÷ãE÷ÿË€kuh¹cÿGûÜËÚëwž‘ÜÚÒ4VmZ¿—%»Î Þº¿Ó½:üðp`àöqF–9N³q??½Bøð+Mc-¨µÇ6¸ÕšàVÝòpóÒŠOÁ/¶-ú¸°F} yPS ®g켋fI È7@Þ xÐØ„(ß%˜ëhºlÔ S1"jÿ£ùˆ&¤QG€ü(ô¥@Ü[ø&„WT‚å€:– €ë«_:@9 µìUØ)%˜nUPšÇkTèb™¯ (i¾+æ1(Á^2 Wvã5aþ@º°Ÿ.©)e•KÛù.ê<2à  !:hè¯MGE˜ˆ&€öP8ågð£h”`/ùÅ~T¾ج 4ÔôÈ6cc:k_`â¨üRˆˆ÷fâp"¸S@ògðuÐKÉØn™ÍþtÔ;“p@%,[:_MtÐ}0)g{•T€Q’€c#@é¶ –†®:® €ù àÆ"€¢ e9*À"ò#b-(…rÙƒèâˆî´rÝ|¸ g.›`¥W®ÙY 6  pZ€©©‰ðY ~щ £„,¯ü…s`žgY6LÓÁ ßëi­»ýI Ò4e“Èsø¨ @å¬éèËdGNèe£tÐK)9LÌÿ›ÝîŽÇÇép˜eåÞü€?§ÛöT&î 0}a¹9àõô4””’QˆŽ^<þM@ãàxºü»ö%X¸+À°Ô˜Fƒ~÷Ÿ=-¥àlœËD)­´ML :Àßìarã6…÷´šì&»HâÄl çhï9Ss[äbSKA²àD$Ø¢ÈáóüpÕTÉ.ûó{ÝXõ–@ÐdÓ‚Up©—›,=¢®…#¥ÔáÊÌ*/ú æÀì@Š›XŒ"Ëß3F¥,zGú¤T:F&mUf­²Ïgq>Æ¥b;ȼó‰Àbóì|Q!&N€¨ûQZcg†3€/î(ÄïQq÷±K4>‘Þ÷Ñk(¹wXHœˆÜ –B™®#(œtG!?ù(b 0žh¹Þ¶©,‹â5ûT-à§ïuïRöÏhOfßÜQˆ€ñO4¤åÉŒe“ç¦V–þã¥A)5‡”ÂýŸ Ló@gß?M¢o<¬KE`/DÃCqd/7:m‹Gûµ“QjfÙ‘v­0 g+7üâƒÎ}¤ö6ׯë‚£;µGOê Hî§Û"I0]y—…3€úÀŸñˆ™ö»^-Wk¿a¤„Ø£Ÿ®È©õ  ›Ã|ñ ϤoEÄvë5+Eaï¿Yu[á à·€?ßF¦œ"€´$¢ õ.›‡Ö3€nsëõ[ %à¤eÚªÂÀn`°–>Oªî>ަº–ü„èâÐ2È«,³}òt|%A`P$1’0H©Sï•gB}Øù/¦à›ÖÊ÷7Í—À‘¯¬D|gpˆØõ’úJí{’‹jZV$XÀà}Ò€èË· PR(¿Òòz]T“0#­Hž_³-Ùxd¨T?ˆHlgP_NÆ5Êãp›‘^„À>KÀ$J2¤ùèÛT„ÈàƒÄE!Ö Ç à7: Ï­oþ€éÑsDÈ/ckñøJ³çf&ðõÀ~ G"vµÆÅ¸ØPŒÀ74”gZ° ô±?à7#…Ø6èÑC(ꨮ +ûAXy¸H[ëK²N¡•8«ð3ÆØKPŸŸóZëü •©•eiaL38µÍ†òpõ±1ÙÇ¡b^PÚ.„.¤ÄàWr8¤†/€-G[»“þ²,ªÖËejÙ»Tƒ¤0¢/žÀfCu¸ú8ͲÓïbU¶èº•ðˆ«¶üùV~iž?ŸKĉ2`(¥éÊÚ!w;ðt‹‚]¾0È\ºø¥Ÿöõ“/ì%«QÓ7£Þ×EOä{Ç3©ç_wøn‘CK¦–ZùÄ~úTn{úCª òAþ¡MMb­²¬ýnox™¯”ÊV«!Œ{Y¤í/-6eÏqSùA¨P0ô$ŽhµZåy¾ÙªŠê¨¢–{¢ aw¤ ¶¸Í¸2X²$0HAnÏšõ–GD"sû ¹ø7sKuŸz*Od.yx‰~«‚yÖà ïIô*%ÊfÿbÊ``Dwúš§ËÌògÂæÝRÿˆºOð©À÷ Èb¸øqýÊGýÅÆÐ%Áå ÂZrŠ&ò{#€S¸dIàö?]M „ä 1ÌÕU yîs¿+OdEË‹ˆ¶òã‹^ú±$T‡Ø q!Ø„?€üE ¼TïíàÝÃDâ\oRN˜X t²—2@ÚÐøñ·%DE¨|}Pý£{AœBã"¬6X±ô@L¡{Ð5¾ƒ3˜†ï, zL³¢œ€É Ú"P_ÀûžxuMo(‹çì ¥Ohâ脲 ÀŽ –± ü{Ð3k&É Û5£RbõØà»ó%~•/_Zi¹½J¹ Í '¬0Ê4V‰Õ¹”qxûõ§„Ôc¹½Z¯Ý Š8q¤.K oi€¿,þCNºÝÊÁX|^›A 8Wº Nø³ ÀW†áÑ‚ü-þK?·õêUÙz0ºt ޶7Æcj5øÞC”þjŸàÞ%ø£ÜŽCEžoÍ Ú×Û–Áwç$vYN‚ѯùjßÙ! NÞÆTúO¨±ãÒ›ë–pÁU:91˼uOpo‘|ÚW=ˆÂsw°õí‰pÕâ'0¾7øf«3þ&½ù[ü›¹ý~>H·$MLÉÖ«`â•°ëB,€ÄJ€Ê·Ytö wÛ‘ª*L`KHæUÓ¹ÿkÐb;€Â³©õå&iÓÃvÄr6èKJÖkìJÚï…ÇFߘ çï÷…ÕçðÖÁh×jG®¢x6 ¢wÿìœð:ùKàÁÈySv•€²ØNAEn|Àv¸ü¥Üf=[  ‹–fšþ ¿ÄJ/àuÎgðt<âd]‹Æ`þ\Ø:|œ}ô9‰RÑ‘8 @>~@§¥u!‚øs`sìê× 3H£…’\ò­ tW/µs@ž v äÏðâfêþçTl‚ÊA@I:Íð½±R(ÚzÓ²ÀW†¥@@ô$…#®þàoò *!ˆî‚@pˆÃíçc@Î…'µ|WN ðןŸÞýÇtËÁÆ(&ÊýáKº8’@"ƒ‡Ðo‹¼=òévâª6] 6ù™· ÒÎÏŸð»Èàt»81@'°7ÛLŸA’j€gm›C8  »°Òµäï«3@bû`ʪrs¾ ª„A¨ˆ)ç€×•Á¿tà’8~›r9H¯+„È}ó‡ÔùìÿÙ;Ÿ['b ŒS t¡' §PA²äÙAâ†4c( ³ €xzw ¼ ¢²í8–YO3וV+å§ïóüñd À““WŠ+7-€;ûÕ>–·éÆ?¿ÎrýY`=XN€Ýb\„ú§oÅe2VC®5A>Ny.Z›?·4þ&&·ä„B›œ‰ ?%¿—ZäÏdp‚A’À¨—BylLv`?ð4̤îÕ ÿ^ ÂõKËŠ ‹¸üÏ· ÞX•{@žHÐÓ?·²DÊŠKcò´B OþÄ"ÒŸ@ð¡ô¦8y$ä4D?WÊŸY± t‰ êG`Nî„(wC˜?A«L&:Åà8½–)¤! ÅßâŒûç‡OE¥$T7"§;P”@%þÜ&}ú¼TÿÌŠ ¤†/j2¸ceóñ†Ô—À혿euЬÅkIšH~ n1úÚbÑU`¬O'A†Hz†¿ºÖßfüÙH·n+K€ÿÏô*¡X]FPÀ0ð~ÀéøØD×WN2à×FWMŸŽpz]GäÇ:Þª,k'p¬=ZF‚ vÙUÐÇdG´TŒ¦Öü¹n´Ä°2,#!q‹ #ÚRÏð0·^¯]‹ e$™Y|l×!Fñ&à‘%°¥þ-:;αŸ 3ð‚ØTC›·Uöû6ågŽg^<~ÿ<ÓWøÅsY1An}ÍíÅÉgÄáŽÌJ`ßÐÝò¢€_6OŸ<ºÿb–w¿|øàñÝÌI~%4@ß¿ÀŒÎg¿™;ûضª»¯'}AÛ-Úh€™Ö ±)BHtCMͦ‰i­5Цj" cZ@Hm6¶&ü1Ú±GÔþƒ%Ó&5˜²¤CL‹]š¢½;MLѦ- M…z}cdMs}M\û<=ÇçøØ9Ç—Ü{}m~BÊÍíkq?ýþ^Ϲ:ýëþ¨€¹È*‹ç@m+ñÁ~+}ŸTTu2èiq<¾(É$‡Öwÿ«fÀ;ÉGÈv„¼ö ÀwÍ&AßǤšØûb¿J0c¥ã -€úZ`º^üõ9 _ú®¸}œS‡Y%Ì*“2ËO¦ÂP2íþó¥¿i&°Œþ|Ê ‘`v€U‡Ž’‹X§®mónI®e\ 3‹°pÈ—=’Å•P‹3Þù“ÞØZ$€„I`Úßö¯tÀ‹Þ„²Ð…à3ÖìŸL ¾ø ˜­NXdúÿÀ¬ƒHë€Àý{Éÿ® ¾ÝÛ»! àŠÑ…©î@Íøúβ«XÊQñÀ¼‰!BÞºönß{$²"SWÉÊËŽ)ú,jø§Ú}ŽÀ0úéÏ¿ @býäÌYŒ—?€ÚUòåy ñk>kK±¨sú• „2Šs€M§z€uì`¯ž‡1E!±êÕ*N;C'íÂçO8N´êç©ÕŽ¿^=ÝqGÀ,-²¡2ÛvÅYµ › =ÜþR¸õGŸVî{d+³T§üð( «>­bg;XÊf]*í¡B˜²ë@YNûþ¹ y2°–0;^í5 IÜÇgq'`”kŒÉAåï?ü;iÃP±_%þ8Œ@ÝZÅ“Î$P>"=–raR&ߒȉÅHflþÍÄ=ÊŸ>$jgJ)ÆESø:Ðl•þ¾€ ïŽ¶Ñ „ÛY™p^lš¥Ú•Pn&Šù`‚¤nVôÄ.|*o8@P£N–«Úç!#z=ëí‹ÅÜfÒîwØ~ †ݶ ”asw›@§L3›‡å²êˆãƒÖ©‰pKÔ翾&$Ž5P–gÕŠüq´œ˜—NX»šcKo_<î"ûÖçtÀR¸GHå}œCGp#Y¾w€–è6†æÆ*`ÞTôÁ™Iª(v‹I%¥?¶Ç5V¼¨:au;çÞ¸»èO݉mbqN A“€å@€óèÜ,•!ãÚ…–|ÆhC0¯ð^Ÿ`PráÊ3 mÕR-;ÊN°#É!…@EÒºãF_<ÏFe™ù"BJ H< dÒ¿X¦( Û«€@KC&øir¾›*£0©oçIùSÖO(ºÉ„wètLß;þw¾$¬ß-ܬƀóè¸@¸é£lUÌÑü¸¡ ˜š3Û8é¹Ìk¡Œ½"nëKÉÝÙ…·úí¬=€-å¯MïS^ýöa¿qMF_’—ÌÁfÑ©1Ë€2 T<°Ê`|øiðó¾ ½‰»´«Y `!¬Œ-W¬` C¶ xr,¾å{À鯬Xñ¹ îFv|5!Ó7]~G±“÷-Ó\ñ‰Ç87éÁ-wŽ’üÏ®]õÎ ³.Nm´´Å_€Ð˜¨Íž|p…¹âú'ÅÿõÁG7”ûé0 '¦Û`¬ ]?ùÆþÂO| Ì‚<êÖŠo*l û5€ñÙÐigC`+}uÆ3éº.žPtÚu‰¼-ÝìÈÞbî¶Áú7€àQTÐÅ€›(M¢*8d£€”‡Ù)`άðá0»R”šíAÑáç»ðù}o?"†·Â À7ØkØ1® 5"zhlÕX¸Æ/¿F0C»8m‚5y|µS/%ë å@¦•Š'‹¹Ù†HˆHH¨-Ã0ÌŠnúX KtY°D`™m xèÁr7ÇÛ¸ß.ü+ „NàöðBŽ!hÑÑîÔg~à±¶Ð#@(ð‹)rÆÃ"‰ßp×WÀ×6Üõ :ŒXö ¼„tê—Öt YÿëÓuÚ=01N(ÃÀ¿*A {È̓h¤(N„©1 s€ýEjš-­ÊÏj±Ï‚óa ðMêjMáŸBѬ(ðq6$ÆÏµó‚8poÓÄû@¼'›ƒÇq¥¼ï³'æK¡˜Å|nˆ!º Ƹð-Æ“<£÷?->T|bÊ€ä¤z$°¯‚?G‰Þ˜= „L­’Bh9ˆ6”t5Ù) #J(ÊÛÏ‹ÜùXBÄ0,&ÐâbQÀØDÞ§W(DMBÀûµYð»À„Â_V voe×ðA >C\X˜z×@u^gÛ°½fÆj’Zd©òò"akU+Z±Ú+ 9(DGÉqoæø` ëÍt’9>6Wú€mì_Åi†í0ŋǖ¶8 UZœs#Kæd è¿ùŽ`Æ€’ÀòDäQ×øuWÆnÖ1øT¶š &ûÏAì°¡ºe¯€Ç˳àR%¨•áM¡ˆ¦€,{àWfе³l¤p Ù§,ÕÖ fé‹tq=}„]«nú î´\HòêD·[JxZœ‹ h««$!ŒâôY;TTðŒ à~þ¢|ñ²ù1á2²E’‚@æm_xÈbΚŸíºJÛ ™—×ÝËǃşÓÛ3¤žöΘN÷’ÙI…À˜7Ü—óúšÜ=(Ú­J È]ð<ÏBöÒ»ªÏ‚e+ıž¥xs-ÁònRV hò,$onÕ*àAYîûÿºG©o„f&“~G€@ÇŘOnø¾x*=âùAü3€™1ª‹;L>¢­Ä¤xg„ ËﳪHzxyY­>\ÃÏZ8€í¤A6;ãCm01å @I ë‚`wa‰}³^6Sé”Ç@ÕwÒzZˆ½ åÔ+`[õ ¡‡• xžThRäï鿈Ñ* ±oÛé°ñ6ãd‰ýj3x“c•@@ýæJOLx©¾|P6Â|ZÄ€TsÌVb¯€Qàj›UqgÑ_µ8º@_çU€^ɰî|Àª¢€Àsüœ6l¼:ä0·H”¹H²Ú3SÕ¦ìž%^ìX X®Ä€sX'¢*¬'¶Yð¼\“¤UÀ]Ò*àEM/¸‹gªʉeÏ/©¶*n? Âü÷‚ýßjË^•^WÍ”"è8°ˆ';À”LP±F³7L»è¾bÈ^O €ô XˆbT«€s2 |-†ì ÛÀUÕÖ–ÊE/×øÐ n@£Î~»QÔ`hÂé^K ú}¼ˆNb³žÃÕBS–m Xè·X¯€s@F«€g˜\ ”…f[ÌwVSÀ9 $}õV ØÈÒÌ!ûÕíöµ^Ô"+r¶±êUâÙNP©µµ)À3œ¶%À¬ÐJMü¸b¸_:M#S©€D°Ý±@óa;Y2Õ°†‘ZjdÄ€nŸzçe¶I‡RëM¤ì½j³ãÜâLÿ?{×ÅÆÏì–Ýf L-A¡Ì¢@é¡”’^z©í5èêcÑ…ˆÀ¤|-‚,¢«[ ”Km7Lrµ z±Ù&º`*etq­yèâÿ¿îîôË7gwFq¡‰ ö{9sΜË,û›ß7çÌ|¿ó›ô‡ì£„òlÏð»uÆ<ä”y6¦§¯üµà|nu6ëA†WU Æ€%ÀÙø¼ç3 ˜¬¼õI2 Í˜vÞ7±2B²y\…«ý èU´{T×õhÿ¤ªÃÄKÝÍñ èÕÚ‹;*³ØþÓ«mÇétÈ·¥T²ñ"YmH*˜?‚?ß'ìÝoÊY‹Y™k6cª ᯌüV"=•LHIBd9,nãý#Þ/0^ Ö_fýÃF<™t:Ò†oÞ¼ FÖ¾@Ô¥öJµÝa•%&Ç])…(×ÓâIò(zÛÁ®ÀƒÕ6C&Oò¹îM„o_›ô oý/Í©6±¹Ô)ž H­t¬½Ô½KQ D°ß{r™=Àì!8ñvw¡èW7vó_`Râ#ø7­¬¬þ  ¦#@WtQ—©˜L0h-Écñ0_:µ‚£m§ë ÅE—I3«{~Ì•:¸l„v¸“CŒKúÆ+I!ÿݧ dn·=!"åg¨›i0\`Ù§ÂÇ3g![eÎ9©0ðUÞ*H> L‡ɤÞÌ0ËgA¡€íÔpm;Š˜žî,T¼Qup¿J XÇÛUÂM»omãÇì1~hu—eXRm5"M)wEC=WEeUQXê³nñ’ Æ€ká Ò >Æ‘£¼7­PÆËS'ýö]m­áùüpº°œá`sÞƒ²˜Án”Mz{©^ºÌÛk{rÌv:e ŒèQ=©.v]ózŒQÒ2¶ƒ0›®-,áT¼qú+ßÓàWu̽BÚCæ·‚rë¤Fß`+ÏíâðÉÜVé6kf&7ª{Ϧ ß½¤„— ð1y‹Fq®&À¬-•• ë[¢Ó´’wk5-à“Ç¡{ÂL«—¥R….0¨Òm!áç»ÇeåXÈa+Æ[D($fB @­¦ÌiàmXIŒÔ\–¿’jßò‡L ‡M'X䜋D´Ø•g[$xùµ¸Tã{÷óEÇqük«”R1Þ º·uèd&;tˆÂÃzé s°¥[]#»ˆ7û×Ö¨‹ÙBÕa+èPbB“„‚¥»‡Hwçù6ÁÖ3„³ó8=ãìç½€øâ³3°»€qÌùüê ÿçž0³3ýèŸxy“#Y›®ÙüÙXˆ„¢ ‡ò¬)DòãøZ@The} ê=ÎZ@T*3Ûš.UŒcÌs©2ëÑ5åÙê` QÇ±Ž‰^¢Ôõæ2k ѹa= úJD©Ö"q¹ý`žu€È™þÈÚ@„ Bˆ!Dˆ!Dˆ"DË%íß@oÔðÃƒÑ‘ó§´ p^NŽæOsƒ€N{“| p}ÙƒWY¹Eà ¢Vnˆ–Ì;èK °‡©©ÈªU rkãFÈISs9Ñ…?Þ¼åçF8€¶AtHù|bþv<°ì•[#¢|¢ÞÏf6¾®q€ áVˆhO}þ®øÎ—b7+Ž X53_™Ù-ù¦J¢‘¯.P5ÀŸòÙÓiò5ÆnŸâ¸€kä¯Èª½!¢³‰ø¬‡ÜR¹\.köA:ápo}ÇO’[‹{¾Œyä/ÀÅ8´;Éíèï³fK€ª x1ÑyåVíEiåà ¡®D\_߇kjÝóÝû°G€,ÞwÖ.ÙJ¥•K âª“~œvÊitm5@§Ï<'mÙ²æÎ¥6›¢[lãï3˪ƒ•—ÇI§ÚgöËlØÇI8,®E°€ëžg[Ú#¹ÞDZ¾oÏÕV#ZÀ!¢V¨"òÍÐW=·jß.pB´ êŠ χ˜^ °ñ[°¥°ê8ࢠ˜áÄ|"òHp—Ú,I€œŽ÷Ú€¶Úʰàò#¢TÉò,D{ hm!@¶îgYC- ó\Ý> Èo±€ÁzíùoŠî(Pàú6ZÀU•kÀF]@~1»´<yrvÛ, Ý Ð+$ž (‹ó]°<\þ¢îê£ë(®ûì{zOrDÛ5c\Ûy–›”¦Ø–)Ð ™8¸´,9''RIzÊiAVJ@6œZ‚„Êp°-h8Æ&¶äô€ $ÂG’žÔ¯Â´´Äö³eC`oc[d›­Á²ÐÓ{»½³÷Þ™·Ï+ÿùæ½ÑîìÌ;¿ùÍ™»³ €Å§í³àhGçš´ù™Gš›:ïÞ 'ÒÏ| ó䄞vž…'Ç |ç±5?¦(ç{|Çã}qµoåñíMü$ ÌñMæ¢ðížÎ-V0çgìI°¸×÷X„¦ÞÙäû¨ó‹ž†`NÌáõMk¶r<¬B¬ÌêÛ…N»2gÁãFΫ?ú¬„苞[GÔð²) ü¥€ùw-«KÚ²Þw-[d¶“Êž2ÜìÅ{:ï"Ù !W‰2rgs¤ç[7Ìý ’ÊEu¤M@X.Ó®ŽdÀ«ð‚Ït¾/ ü±åÙ7^3÷÷]Gºk¥s2Ëzˆ­Ô›2ß•i¯Ti3ó1m•gw¼ÖÙ ÉÿŠßàuœ‚?˜ ÿíêª]ç !fD_Y‘Gá÷Æk£84Å+¤ç[×Ì##òù*›Ëmt‹T.‡Z¤.‚b;;ïéAŠ5öHÏËêÌëço„‹ 5H…u5A¾$SLQX•ižIäh?‰=>{ùsºÃ€¸Ë ªí%¤”€C^ºxï?¦í’ðÈ}¿ó~Z±DI‹ïy ®"m9 ß/’ó‹É€CU ø”…\BKÖ’<fBczÙiØBZ(ó*•v¶»K ¸_p˜Oߦž—´w¸_^N36d‡J Ø—È¿ùå•têYSNbC¸X=—Ïéú jÖþRŸšâõ(ýE–—?ªð}S装å”dŠ\eÁGÇ=]p;DßZ*Ý;,7Ÿ1àßW-v)ÏμÈðLå74në=\¤m?Ç/c¾!Û£¦HhkdÏ#cƒ[lAF2àöª° ¨þ×¹OÙ5¾kJæM*н#üñaaÜæk-üóýŒàœ6À]"½Ó-¶á?î›Röô ¯6WxrÿÑIš2zS»=ùî0e!Ò­q‘Ô”¼ô=Sð>•DètÈÏhšDµ€Ã¥>5½Rz÷EªÎòV5«pjâ*Ûýµ T&ŸCtƒž`àVmIp¤È ¯€Î;-#¾¡p;hÑ#´ë<¶î~!’ë 䜓(¡z—¸â™GC tY#¡ Ã)$ŽdÀ¶0:ÝÞSS¤åÿ%ø¼që¿IèÍúDLf‡©zÓâ=”7Ñùç¿e’nã¦gîC–rßZ·ĺJßæ‰³çiïÙãõÛ%î,”¨M#é@ôb;@,Ë“ï×%eäDy°9¯Á¥%ÆMžŒ Õü×½•C‘-®[c5Hñ€í¼Ó†Ä*ÏD¨}¬ÂôRa uˤ…cB¶"‡`§Þ ê>˜Q7û¼ÖHÒ@4ß? Eµ"8 ëfnž„2@v´¬2oÆQ38-ħ%ZaœapŒ|;ývCaSÛ=°‰ªŽK 烪 Ge—@»"Kù^˜X+Û€ã²Ål)þ«edyk7 Vn“s?ÁRˆ: 0Ï9vH’¦’6w¨ Û$aq]A‚*µr5&ý=]Q‘U¯ÓPkq›UøSP¡'õ>ôö-fÄ@E2à°Hä´- ÔZžÌŒQ¬<RB„šuª •ù‘š»Rö„V^ii޾ϒ«¦efÁ/ˆ0BaÔe–Èߣ‚D<ÂÆŸc ÑENi›n5³·ñŸâþ#åâèe˜}BœGoTP˜aK«n!ÄLnGMä,øîFƒÈ®¬)6ß>O¦\„+Ž«Ïáï0Ç,¹‰TóH‘°” m|š{Ôòè´YpõîÝ»‘ª77q£¹Ÿò”·òRz›Vk3÷tK›išIí·vñk#-öÃ~ äÛ!ü H¼[ëCä„p)ƒþjl9À1s6‘£ÆMXšYÒZâ7¶º¼Üúd­ÅdI€ë:ô’É< XÈ0hð¬!Ï‘!Rw/V‹»D–j,Ñ!án£j-TÆ êÝ}•6‹©S§ jBÁRð Ž©òÄêÑ€ÀÄÇæ•V¾˜TÌÙŠ]ÝÈR>­!4P¾æõ(™¤5MSÍGÉFÿ„3ÎùžÆLˆf øAü7|%öȲó*ˆ® §Ík¨CË–Xn6 Ä5>ÊöƒÃ]¾CS| ¨oõ+Jø¹™TH÷Zâ$ÅÆ ‚)h< ̛}ž]‡Íëâ0oŠ4é'bÀ}% ¨Q-½µ‡ðDìG¾7šŸVÍÂÏ#Pqm†GÔ1ÍG‡ñ߀f@FÉjîøÕçW9¬Bd̰üÄ kŠ/±%MÀ/²6ÛÎÐÉHɘÀS¬WÕ€W£ k±î—US›7?v‡f@ÏÈÁUTp¯økîÅQ$ଠ•ÝN éò ¨óA¥½GKB X³yó¦[ H¢°@"‡¤Fh <âÍB¤KŠ%«ÐÂ!x–*a À€ùL2K 2ûsø˜ë4Ä×4—wÞµÌ$ùÈTAÅ€Kð³¨í4ó£éŽU—°in¸Û"‰U~½"¢]È<€!a½øjˆc@N;LÈX Á5J{W£…f”c@z)éá° ¨OZ «L3` æ§9¨ =‡*§°;Ì€î‰Ã<à&t‡bPµ{kÃ[q¶ãŒÔsõކm@Í€m(¾²Þ3 É]£aŽéÒ &\ "þÄ®à½à‰Œb@·…ƶnßœGD<rx?–5y½ƒ= 0 £Œ ñ>b@z*@§ž,~ˆ!Þ†v‡Ãó‘ X¤,œ–æè½àƒÁ!8Ì€zâµ_ ý£-n4C%rÙ]>þ–¦Y¸ÏX¹ѽzMk;íGÕ·êtÎ[™xä´¯e&e2qzIÃPW$F}6 YFS1à”P3 ‚! ¦N«8?š9àŽíŸ HOå*ú¢X8û$d¾€ý÷«° Èoâsÿi p!þB˜¶µrýÿW3àQ¬p¾dM½ð¨)&Ë€Øp:+Nø~[©Qre¼az} H–‚]¥tK‡àKÑ$à@(9Ë€Àöt$ò2á˜aGÐmòÙ€d¡•aÀþÀ•aÀA²9xÉ?’ǽ*_±îX§5Ž#Ô†“¬`ç7KÅ´»ã©ò21ís`@"µZÚH)ç8è³‰Õæy¡nQÝ¢¾0™±DÁ)5 ÄÚ€P·'Lã³0,Û€î(Vj+½‚Ýh°›@Ê!lò²$ ÙÉ€¼÷ýç• À1b@m¨*ÞÂ팛SZ ÊKvNœƒ H»º)Ü n,çx È€Á(ãÀ6`˜ó8äoæoÆ×ìèY0OE{1ƒ²Þ0ñ6 p~ Ò ?ÕÉ€nž|Ú+tÎO³J[}TŸãü«†!/ž=×”K,ÇägÁS¬´O¶ˆ*«ŽN÷1`45 = !ÈøÒÆ0àñ¥ÂXé_†ad¿ øûr ¯–µ»!MŒ ˆëâôTpRN ïÏè÷U™ èxç>Ÿd<] nÜüVÜo ‹g!Ùþ™*Åœ82öS‹MˆsdÀ3âë“y+®?e£ È£QŒ È è ÏH,戯hp?Ü ÖP3¬îùqŒŸ#€Òœà0&aÑ¢fÁã4ê¿ BT0»–rÝkiÖû"eXd@ ¥¾ Ó“±Û±bÐɤ™Ñzš3IÐížO½ˆ“wûQ Hx^_ß Àc“˜ŸtK÷Œó§™³þIªðTµ£íŠ‚‹ ¨‹MR&n<• ¥×o€Úû)~\0!£xÌC®hâÃÚÜ9Y€³±šz`ûÚùÒ0fÈ,ý©(°‘ gÅí„@mTN2 €X:ŽLäºe~”´”‡]± 8ÈânhÈ(™OÙHò®ö,˜ © ¦&Á€ã8w‰f@FŽ4†jˆµŸ;†oo(o’KšÓ^¿-<k÷O¼Wã–c@F¦‘tc8Ê]¤Xž ¦>‰üám^áµAÀ•ñÇϰ ø1¤à*Î#ºäTì$Sº\ÌD2 ·3’G7­Nù"‰815Êf@Ò8SCë€,ä;‰4—·©#Ë8YUÖ܈±ôÑ6 –$¦Äð §ÑÞ0þCÿ‹²£ô ø»YépE€Ä€lŽœ‰J; 2mkӦТ\|'v!/øÈ¦"«¾‘ØGj®æ–VÌ,˜ç/þJçƒÁ³ZJ¡€mà è¶)!ˆ\YœEUcÀýÔ°Â6 îEx]p»#h+8a7y(i•MãbŠ‚ù {P =†…#I´÷£¹ù°m­xZ¤á5vD/¡I»ÛêÆà¦›C#TÂçR§7)· „‚c’_çä Ÿ§þ´â=ÃöÁàóTéš{B;{Ã7‘ÅTh&ß+¤à.©*‡†ëóNæ—ñÌï–Œ™ˆZt е*êˆÞV7ˆÓ\e”QeÄǾ„™ÖømB‡9ÙáUœ¼ÉÎêJ }¨ÂÓlÄ ŠY•@V‘¸ã'Útc¿¢G“n¯éœ¦@‹k¹§¯e‹ÄxÖu^Jù•/×ù?…é„8ÛÝG­eñÎsÜ>&Pl9_{{Î"˜ö¶’FN* 4+ºÔN—©ùV=iq{W—~¿¤1t¬ŸºCq–3,ٕο¼Ïí%à2y‡ºyˆ ,E2n¶‹‚Dc$¡¬¥” ›ïÞ[yþ€'vÃ^¾õ3Ý‘œ'5üENÒý×nuJ‰Dÿë†y—? *Ü}˜Ý!­ÄÆ[7,§³Ínløâà÷]. ÈÐ¥={= ]yØÇ‹÷)Ÿ|&rêq²a¿³¨nñzŒâõ=®CY\¿=dCÊ´(ßPµñà\¼¿añÝxæÃ7ëßf˪/û*©•£O Ö3â¬8¿·¨á:Èè—{l׎*S=ÚPw¹” uzCÃòguí-jkær­B­Uò|êwÏÿ·w÷¾aÇw½Ü‰j‚D$T:Ñi7 ‰‚HÔ ¡ÐµF)¢Ôˆ†žÒ[èÏET»Ó¹ànžÛ0ĺîñä÷©§ØM¾™dgîfTÞœ|äEߤ—æKÔ)1Ǧ”§…ªû*¢V$çæïc/rÆ~>ªM~YËfà|ö1mù-¬ùqÈYü~ò”ïÞ[¿ØåÏñô5NxåZ¼µA¨È¬w5ý͘ëCe¨‡ :É Ä{ «¤< $Y£ÂÙEP‘ØÛþX ºI²õ¶2ÙxjÂ's¸ÍéeŒ]2µðjríþy}Ê“¡`¼´³Y4P“ç(— eWâÌ“d¼åÖÍT…·Ç·÷·V›gKöâî†ÑÖ›éQví–hvÑýdêaç†K¥ÞÉ ’îa¢gÊ @}lå!ù’[-ý!@@€ €¼,‡¨\cIEND®B`‚sleef-3.3.1/doc/html/sleeflogo3.png000066400000000000000000000517641333715643700171260ustar00rootroot00000000000000‰PNG  IHDRvp÷Ë0PLTEÿÿÿŽŽŽšššººº°°°eee¥¥¥ÉÉÉ”””ïïïÛÛÛ888yyyPPP3Y]? pHYs  šœSjIDATx^ìÙOHQÀñw’»Æ¬–7—.uˆêbD"¤‰â"ˆ‰Ñ©tafQ’Õú;xˆ ¶Ôl„MÌÕ"¼äØ#È“›ž:­¨A§vµÍÙ˜·þ¶·tVçsØËêÛ7óûÎÌ¢v W¶È}­žÈl Ûn €†«›Ù0¿v« ö;€‘ê18§O‡`±_f$ãz;è†dY> œNY¤„¤srA*À0(äY craîZ3€IÙÌ9[®BJÁ@‰Èo:ÌHA®‚á)ˆ³ çIa>Y2€ÉÉ`º’¤Æm àÀ.à›x™ ÍÕý}“Y<;ÉGþc¡ à±rvâs1¢GHR‰u°8Ä=õz*[Ž©Äðu„Y;;i…¤ÌUîfð†Œ(N§t|9óœÑµ;È‚>lF¿íü²Î™™‰&1€!ØGk8Öy3Z4IŒœÑPóë¼8ÚGQ Pƒ*Ñ͆À õ3o(Æðø®uåúW'¤Ùþ+yr*ÎPÚ¹9£}N †/þÐP©AÿÃ*ÞÇ£çq¶(@%¦£õÿfP<)&œÐmóüvAÎ&šÑòˆtŸ¡8ôr€ ÆP<é¶Š@¤ lgŠ¢€é†ÔÔÉi‚te+Y&Ô«’4¿òÊÕCË#´À›ð¡— 8ŠUÁ>,úàÑ}Gã{ÏÄ¢½ŒpJB×^OOŒ©(äµ* 3ÞGñP%¢›*Æ À•ºÎÇñ€Є?@>Cž^0¼<ŽP<Í­@O¢û09%ñûÝn·×[“|õû»#f7ð–èJ€¼BŒÜ„<ÑÅìHKâ\ €¸Ta0ŠìÃÔ”p‡»Ò««mÛPãéž €Iø‹¸ 0z/Ë„(÷5Á‚|õ å±Q& QX8€2 ؇yÐpGmk«>ú–òò>9©¯¼Íëöt+A:à® G€ðà€Lô|Ð-$GÃ9 5ŠxCe €?Bm4;€àöî74Ž´ ø|õí2irÙ{WÒø.DFqÀÃ0âwpKõr»n±WŒûa冃é2TȇJŠ‹i°î|Èí匦MY!ÔžYÿWN½Sh5 ¸pˆËz_Epæ?ï&³3™îû$“y>mËd[òüöyŸ÷ÏÌ.— ™ä^i5q-É0ê—VÚüOäœÞ½`Á—höoqa|ˆâ˜H€ßMp7´”€ŸM€¿wÄ6ÊU]v²O“O HR}á>ŽBÀ“×ï·ð1.ŒðOSV …n"€³)ð?I°”@Á:Jh§¬;yV0¶q$S’äKÎ8°Ño‚E€ÇÎ(€äm¤¿òO“ÝøÕDO§€ö\H€þ ­ªî¦_´E•ˆ¹¾°ßLé½sÁR2€&ùT&̤Àß0 šÉ ÜS5Q­ £˜’dTÀÇ€;Á"Nœ­ÍdÇ ’L¤0m%¸• êˆæß0ŒROUEUë÷ñˆpHÆeX´¬½¿Uœ9þ-vèßìø«GíÏ¿Y¯ŠªêP5'@¾<ZÖÂ:;Óìø]›G`ýœ$µZ¯UžSIÔ°ÑÉ€"¹BøUî7ñsõì ;€3;€B÷Hl”u©%Ú uw ˆ­‚ä 0ë&è:þa‹(¹vfø/°@‹ìгG pS—šbÏMzýºÀŽ ƒ  “èß%üæ²°É€_eÀï‚   "zYŸ÷Ôâ{€º¡óçÁ¬Rñ×d ße0`Ú‚°CòïÕýµÃT*zS˜_€ðl OgͲÈ[ìÐ"Ú8ù'Ahñ#@¥R½ŽjA`/Å^~>{üvü";~ rQMÀtî>qc(ƒø® “!€M«¶ÁРÙSƒ¬}ªWê.»~ÖÛ ÓÀøx-{S;€³ÎX°ÖÏ‘  S䊀Xa/4N¸ênö vÓ P7u…?é‚>@‚ ¢—°ó 6€?%Õßìð{ìŀ߃€–«RST‡H>-ZBUè »ƒ“Vxœ'éªàçFü«?bp@Û= àÞ,‰ôõˆ v£§ uæNÈøh‚¥B7{?2 ã#€ïF½Íàtç¦îú©7ã½àö€ºâPµâµ…€§nvx7w##4`0Ýï÷?`}e9X¢ZoÄ0Ã9€!Ô~‘ì È}?yÏø½k£ 5eL)Q|ö耹ÌÙÀ%ÝÒýàøcâvË:)4ÜÓ`1Za hHé½ y¡Ór¿Lš@8Ph8-~ÝWm3TëUÝôzÀ¾W5Šl[ƒùnªSýSÐN¬7Ê €†£—‚Ú(BаM¦'ºhrÉSî0§PãÅ«570®¬êØ$èò¡|¹°œü`´— €™‡~“Àç=.2@;Æ}PËnÀn±Ø÷Fè"PÉ éïa¶Óƒtwv½– Ò|þž ù¯† ù'Þ°U6$²`“\㾪yì¸ü+n5 ù§×™×f‹O&ÿ‚rÀ-vhÀ 4€‚Û’E@;͇-Ø1Û@zÓî’/_†ïŒ7þm¶³°`à,4€õ²î èÓv0R¼Ðý£‚ûCNú õñûÀ«ÜçâÉÙØcP°Ü€¶*ºI @-ÐW5’W­w€94´l:4›ã—€ÜPü-öm&ÿàì£;´È ÏXPx±ª˜Mï ¨àЂAmîÀÐ:° ((sÏ‚f ò é`¹Å]†ÎFýØ7ƒò]v–€¸»€ î-Lò©ÉÏùY%Aä((ÝJá˜á[ÐpñÚ¸SÁ™ýjKþ¦‡ã0m±¸`r[À’è ö-¿õûÄuº(Ù ‚áê6ý—},Ÿg½3ˆ>)2{T';€»Ð@ž´€s{Ý–DuThÊ^¨^Ç•jÁø% »@î·í`Àóò]X[2$©ù8G²ax§šÁß`52o´ñÚ¸%à6·_À¯Û'À‹ž±@ ›U2$‰3ýõ`»ÿ^ØÝ×0¢Q©T½%@?Ìf€ iÜp—;¿hŸ³ìàMÀz¥ê  †E ãË@î! chPm™h ­"®]aëi¼}ÿ„ÈØ<ø `ÙPý>^òh÷ú¸ Ȇ_DÕ–Þ8?JKÀüx ÂÈŠøûwO€3;€;ö@Ê•à$†5o}?À¬»é'%à µ¦ˆ#ôllŽYf¹H¼s¥}ì¸ ß…@G$Ñà¯à‘“¿èÎe^ôì!ä-¦‡ Ç?^ogÝfPèØä  åJx3€ 9 )˜ˆï•hçGÍôÝjq…õT§þÙÉÀd—À›€W@l”+_"eŸhù{‚1Ö¼üë.–æ"›Gª&˜ Æc”€ØïʽÿRÖv9fèòPëdM—¶ä5&¢³Àù ´p@。j›.‘ây†§…GâW¯·3PXdðc À÷8HˆÌ0mJb𙬑 î…š‚åß៼gŒŽñmßTx$}½%€mŽú<Ç`ª  P&·vùu!84zXó;@¥`{dh`(/s£#÷~';Sv?·Ø Os`èô€Nâœ×Ž #@u_þ½ícÍ6KX [ÇK ˜ìÆ xç…¬ä_å˜ü°Ë±xÙ‚°Eîmé÷Ït·¤ÑIþIï@G_b:GßÊúöÿ8V…/?映¯Z(€ÿ³w¶1ne7G¤±ƒ oÇ®ÍZº°…XÍí¡«‘äôC”J Qd‰)ð!*P\"ëê´!€¥ ¥´a—ôdàˆ me!"”&ˆŠA¸á]¼(q¥“ˆšÃ|«’TÂëÙÛÿÞ­gf=3™Êý¿Úñ9~~;ó¼Í3fikÙqCïø€âõ!¯ßž·Ôæì*–üCä<À¨eù‚§hÊ~XYÏ!-<Šëè`¥Ê-<XN€k°þôÁ¡"¦¬X¬Äâ3¡ùQÙ/ØÊáúÍö€ îÄ–€cªrð5 ‰ð ¡p©¨BN–vUàZ#½%¿0KFÅåßÑ€‰¨ j7,Û#ÖÆ'”eÙûź¯2x¿{”zŒÉR¯³ yÀ^(ЬU ìÒm`ЧjN(`kJ¬Ax ÃÙo«` €áîLø‰ù€VÁ ÕílOÅš¿¦"*|ºY{$ €øp ë„NA5,p’ø€…°†ï9 š©„¹ÿjX*WÉëPᵄ ÿvˆz´+ ¸¤B}XK`( uÃúvXü+»¤ ²?”p‘Š` V…ÍfèúuSK˜%ÀÈœXŒÛfÖ›ˆP«áeP…µ£ª‡-€”„D4ý:Ã|X€%V]AäÂöŸ°Ѓ“íD s€0Ð"É 1k@ö‰›@îâ/Q²Ñgü³µ¸€ X ý€Úˆz Xpƒ Dã{}Q>š¡ê¢ÓA_;CÔžó±GZB¼=öyS €_À'»ÑR\`‘“þÉöŸ0 LÜW!.¦äaD‚2ï§z‚¹+ZSÁ'žnɦ‚Ÿù’X&ðx¤3§Ÿm©àD©Dbü0ã ÞþCÀA à¢\ìâC~HÝ“ßæk-§ä‹AƤkÇrVpH„º°RÿDE ¯ÒpQ?ÈÇìHPHÏ}ŽFÀg4÷ì–/›ùjàuÕ@ƒ¸¡ÙÚ01Rÿsê­]ÞœÅgÂ-…HPLæwZ´%@/«[òA§„€Ž©`²DúûGBŠI+@>z¶…ÅJˆ•ùÞ$j‚bºÿwý ¸[sOàAyÌŽ<ÆF%À@à&]@øy7:l1DN-`$7PD?ºA‰ƒôpTV¾'ð…–"¦,ó»¶ z|^ØçZtÍ$& ¡ ¬çþÓ·U\ó H³#Àª–<æ% `ÍM]¬ Œ—H»']‰2p%d>v–âê<´[£# ´]dHê¯0Si[ùQZ @ÞÖp£Á]…½³m+¡<Ü@1_í \Ô À1a ÏÉ sΰH ؤôõÞdv(òÐó«p¡cÀ €5-y^TÀjé³ÈD'¼ÚÖ‚ 0¾8nïÙwmd}ÔØJT'[H@î²^ÌŽ<ë`ìQ@Ù‰î¶í¸4 ZDÂUžÁDÉÊ%†ÏëÀX,"6+àd ÎÝL@gÁóŽG|yŒ¯,`H Ðna Žž’> Q0ÖÉ¡¸ä €çÄe "…ee?6jêàe<¨€µª¨Æ(& A €¨`í—4>v?(·ãö·JrHT2è`Ò],{€ìŲ˜­× À“B×.Ž1qÈ*–íR\€ÛY Wÿ€áÐgõ`väX·XEžîå€E-`\hAòÎÿÿ ‹6Å€.)`²ŽwY´RðNW SJöcÞð'-@å0Î.†aÑð‰Hú–â’€Ž=À8Ôg¡øn+-¿M/ŒxI“ä!e$dãŸûó߸pá÷õF­ôõ°]Õ1š.ŒîàЗëØ0Š ¦QÅq’íë9oz™™Xü—Fݳ®ŒˆÕ?®qJç‚ÐuuŽó¦u­[À‹ ¸oq\5×nÍXD°Œ,ÖþTü}€dP¶®€áZ­¶M€ïu?£) À{ä€ض ¶ PíÚHeƒÝ#~Þ:À“«ßf7‡¢”_zR s’ä@"Í„tN¹J÷Àÿáß»$ ÀpÔÊ æ@Ò^ } ÍÏŸk¦¿U&·E óÙ½{W¸>) €y×Àê€tƒ¾Ø’®àò­s¼„%DÄ´ëõ€!ñ ' À^ ä7S0Ľ^VI9ØlI€s¯ €?ø Ýœ.•»Œ`|œ 8 °¡’xÛ¸Nžk>C2¬Åï+€Ù’yíMš0Nn-cºs¥_€À0 h81çCwðÀAW'šÂ%ÓôåL OŠ€?uNX:·O7“ «6%pÜÁh0ßf~ÓOQé}ƒ€tS(>å²8ÏÃ@Â\Âù}QàNÇe€ÀÛ㼯¸¡ž€å~Ê-àºëp´@€ÃXBDÀßËmÑ À‰É­U°00Žvà@Ô\gÁ³·ñúýÞó¸" n(Ý$ €qVŒÉê 8ï…[¡’Q f„Û©S¶Ç'ÅnàÃòÃÀÜ> ÂYK;Ä@Bò² ÏxL—@ v¿Ée€—vh¤ÉÛÅ1ŸúýYŠlT¦®Sðyy$ì!¢wÝBb׃WúEe‡@#95š_:ö¦šø 4€(·×àöÚ7¸ƒëà¿«€ÓÈÁ HÀ±k‘³þCEn9ÖUªVã`83D¾¹C€2·€lÉ4²é¶ØŸ³0džޕ4-ËÆ×Å0;<²~º›Í¶0¾Á2Ž-‡žjÀ Ìô` ‡‚Ù†-r7°’ò€ÐaTy8ý(Š=G[ágPàW¥7gˆêâL½Õ À¡^O8HFØÒw Î0ÖäÀë {‹´0Ûñœ €+®›¬ûÒÄú ‡ {Ù ˜â…ŒSŒ ÿž »-èó˜+ €¹‡@·}ùŒo0Ä©&â?«€­»·o€SAåQîÖNÙ8{ÀÔ¶4äõ€N|•{þ¾W û¢êh\óêXŸ—ŒæpM=fP p)A€=€mØ¢Kq9àCœg›è/>ÏýÎÕ9ÇF†|†÷…Õ—@n_ª GûüHb}¡Ø® €¤˜©}{Û{?36õP<÷{9­ÈËþÌÞC½¹þrª ÇúYŸÓ Dô7¿UÀ‰RàâQ@3ˆ=^€·]Œù  W|Îâ{"àé¦è(V Ê}œÎ4csdcÖ7ÈÔSÝ‘þÖñŠzÌÒ®N, ЯX}^-]_°—b€>Tgl¬ü‹âͼtZˆ€wð@[¾Ð/‡Î§UôNú7à ;8…펖 tQ9$ *ýpŠÞøD9•UV€ñm, ‹ýí÷óð!ý#ÿ#vP»yiŽâ1ÎÍtpÒèÜ—~ÃXD®bÔ%HAšP1È%N…Ž×&v¥ ï ä8Ñà_jl„Æ%y^ÆA¨˜Ü{ùaÀpû ëèbf%/….Xš+’»ûfÐîÛ P«M…€!ì| o?Ó9‡²û¿ÛEŸ3?„>j`u‡R¯Ã“ÇiLz&fbæ7È}‹ê €„‹ ýÊ×Ò˜ÀKqÙ¹`oÌgæç ?'W?ó*ž î' Ó&®á×3Њäu<ü@äéØ:Äeù¯fŠÛÑÏS\L8CŠ@G`´'þGÝÝ„6Dž‹ø»HÐCOº%PL¥ZP)Ò‹ˆ %´ê©=ô 1=ˆøƒ(¡ŠT„ÒöàAP*Jë„%Aƒ“úh›M¡‚šP R¡ˆÞ‚T‹J­ßtvòº™¦ñ]É&óeçÍÌ΀F/ ¾Ådµ¹lòî o{ €rš ô œï8"û:-"‰üÕ!zõ6ëгÈ-¼qlÊièØ”ãuh™O¦*€Ûâ_ÊÀ€þsÒìz¢ü ÉêXÌ0bìÿ-(Nßw1êù¹‹õàüÊÒì%ÒQàÊ¿‚ü¦.¤@€ ×™RÓô¢ õgS)‹€ÙQUNV€¶Ð€iü8ì„êC²c`7„Ø I¯Í °N€å@Xœ.²¬ûƒÌdpüïÚÝŸpnÿ#u¦KðöàU< ,€Ëã 91=l9µÿBÛK0V:mk¹ø›€¼ð”`<. |Ħïhw€ˆ"}Ü£@C%è-/€_ì1‹#7ÇýRL8¼L.L³…'f0¤ hÀpUð<&Ü¥É)Rl‘9¡l?p&îÖ…qR,^¤0v“*•IR夸ê¸@êp‰$ÏZ^K#)³c°~,˲{ ï§§ÿÓšƒ €Ü3 dò¢B€Öëõ/wßûþ;µ.ïý¥?Dj¼®‡°íý¿×µùJþéuùõnOû|mä‹ pV@^)=†öà.€z¬^›*+|ûòÍž¿´ ô,Fõ¶µGõy%ÿôÚÜ)cý§/À…þvI~Ð)P‘ ü¹þ`|x#r€špÊ€3E€§#óÕËóx›|÷áxyˆò{Z L2óº®10âg;í¹ ø?ð<· @Íeê ÀR`CxZ7»üa9€Üê 0%¯‚Æð`¥¨wD¦VúîP C¿ Žß¤)Æ6f¢’nC€*ÀÓ !<­E9šþ7ÆØú0Hd†z„ôYÐ<^u Û€˜wvwŽ?ú ‡ÌH, _sLáÈR ïª– Ð}˜Gœ½)6g€úÏeØh*xZ£Ý)àí}@ XÏ\@*!ù¬ÑàÐw€)( ªÄu Ì‘†^Ã!Àoçº+@T‰Ë@—mð<×aúVrJ #  O‚#À pñ "1•·M©„dp¬!Àw€»d€¾1R`õÀ ÐÕvr (-ÙÒ1mn ðƒ)€îŠ8ì*À¤ºùK†z:/Žn ðtb»’*b» ‘ž^3c€§õH€´@îIY>s°×.‘ži“g^€>ÒÝH¥ØvØE¡Kýák¤'lfð´ÎFB€×U¯ H±ãIpèRè¢ ¦í^€á@(´×_*á7ÍÍ^€=Ö )€Ì€îPØgˆ*‰›hžV{”d•@ZˆAáUÒi¨x0à\'}˜ºè([€àJ'å`A"#€óHA!3 09P ¸æŒv»xœn « –€“ê`Ï€’Nüä°‚Ï«Õj1,VŒår¾é ÿå^€›"ãc¼F^à ËÐU  ’#Qo\£H¢ð·›ºKWHð†Á€}kÜ–.Üà d0 p¥ƒ†Kd"ŒÈ3`[yÌH7’f„ÒŒH}¤À?'Ìþ…™¹ÀL`?é‚|e ? ~mì¶ËB¦#ŒÌ&δEíï­y’ь¶ÒÅ,H’$e2€¸K¦ ]%@GÀýa0{P ãtØØYö l ”CD¿¯¤”6¤ ˜§-Àâ,X`¶·d‘QSÏ‚-L¨Ñ€rÙ|9ƒ€ròˆ}Õ91À…ô€ï ãS´@¾;˜ `év:ö9²Î"Rm_÷EY9JÍ…m”@*‹ïÌ@¶öã‚S |aÀcõe{ œ§À´ô‘Àš—ºéž¯~Þ÷Å^`Èh ×_Än: "=ŠfÀrÁI ¡s½{ wŸ)Hœ3 ô.¬®eØc¡}<”‘/-ƒÛE2´Vß.ß ˜e4È(ùp5¿>%ζ'Aûk\0'™í\1—Ê(Ð^l²–$)o׋‚—KÔ¯¸s`À> <(¶Roµú "P ÆËù©ð/wç¯â¸’…ñ}…Éw¦†{Q¢ µƒP¸ Ù GÂ7›À¡(n¸àÀlg—¥’gÃt4°™}†›èV§ª¤O.ëŸ-iÚîÃܦ¹Ó¸Ç:?}ß9§Jå1 ">ëœ(€Ý¿¸t €P¯Å>Ї:œ§ ÛÔ|±Ü-›òé툃eK}à5Õ—1½uaû·Àb’‹£DI¸¨ €´®õ3ðåù g¥{EæÒê¥ `K ¼cQnˆaÿ†@øYïÉNàãð&àzî{u“úA-+ïÒ¨e{Y ¼îÀÔÓ‚7À; @¾ÉÚðûš€Ç~øh “'¿VUŸ„txåt'[ùôÇ÷}¯!–$ C"(ûDš.Þ j`dF§؃›6¢@[•õöü ý1¬GÇ$ã1\ãÊPh‘ƒ<ƒ‚·€ê¡ÏͰî3tÀãR™ôÉIuH&öUڣȶS›×„»JGÁP¾Š7"߃C¬&½ ¼X €h0N8²0þZ¬ªoM9ç‰aX¹•oòõq¯ó"”šq©Ë‰Â`ýKêDiuùMðB4¤çщËkÕ÷ya¦&W×€ˆTàÓ:/"®’oÛ"ó2×Vfå¹u\7ÇM¾±ŒzXY&DàdŒ­V0„‘'Ü9Ôòl†6° wë€ê8c"û¶ËO–QæÚ4ZãH3®MÁ(PÿFßÔV‚‚;Àš,À¤‘žfý9áØE î,²cÕ`%6Ù á°H‚É¥?äyNy¦(`Œ‚i0 q·¸Ó˜¡<ö;ÀÓŨ&–O9u“*÷¶£Y @Äb¶*¢ø*ÓÇâï8§*!?V¦„ Öúƒñž€éá°óW­#ãÆ€Êû™8gºHâF¤‚CSEx1qà 6GO%>ÓA€ žp,vÐ6… €éä`“ÀÚý/„ßÊ‹Ôe”¸ÆW¨¶Lá˜g‰…8 ar ö·€Ønœ¯Š€á»ظñ·Ý¶ÌÿjÅlyóç-Ù‡¥÷GÌHL8œ„‚Mjˆm¹÷ðÛ ðîÐR•„«î1@¯DÆ¥ñiçáîO¬üXȶÉÏD_ö÷a[ø¤"b^xÒ Ž„„ÐY0ø¶¿°1ø"œxd ˆ.ÐóËÚùÏqók÷}—ê7ŽöQUFu%HEA †7},Ëéßn €¯T&MYЀ ¦r€?1 ßç@`~P¨…æHãùèŽßô¼×øºóšª@•Y¶êÞ ÐÀÓÅ@¢“yvªþZÇGïEøèbÈu9 ؤªNƒ¸ã¥ÝH  ss€çW÷˜< ½TúÖóCà¡€Ô“s¦q' W®•gòu“ôüÌÑ€â¸tÿþ¢ÆCßòKð¦œÆÁü àœHâå凸«ŠåÒëˆE@Øìf§œÄ“8@†,ɹ—«³>FvE5v ¬xвSq÷б]RlUº·Å zx„Áî;(‚ª@Û¸‚wO@d­3 ¿v¬6joŒÄÓ]D679Gí î7ðö–{ ª@xÀ%$(ÀHp(ÿ¹ÁO*¥ü˲?6`Í=‚d€:ÐVú^àùwÀŠÅö5^Pè‘ö p¦X NÙïÖ¦žÿªò÷–PþÉú¥®ªÀ÷Á„¨A|ª1vºýXÙvü/@U`K`ð^€N8cK+7‹üçÙ¿vãys=´It{M p9¼ Z/ÐJǨ¬Sî€0²OßÀEÀ%¸šQ¼9´Ž™éØÚðÇsYo¶§5Ñt4‡‚T—à”7¡™5cwP(ø®ñ³E@›&÷Å4%À/¹I}¥Ì?Oñ¢A‡öO8=kuʵ\pŠ€ÍÕz‹o‚"Û…ìÀ×-Š€¡u ›èÄãÀÊÍÔµHsÐþ‹k¹Dúgh¬˜MV¥ÀäE1¹,\nª@Q`¬GÚT× fÕ €Ô5 z¨ÇM‹ÿÊé×SøO_vèS* . ¸Q!Çk8À •€“Äš­5 ëq€c†ïRW¯8 Ÿ^ú!‡]Mó 4áŠaërJ‡·XR¾‚= ã&Éù8W8À/ëkcºg+ñ§ï^% ºë„sÖ†Œn&@psnà ìø{Š€Ií' iZ7¼¼´Öëê»Ì5R'ÓßjifÖ¯ èÀ@N¬ âF ‚rìðŠÿ¶ð€½`ÒdñÅ`+Ž/@òš–«±nÿ×c`Ûv Á†EøA­Àu2ùí¸A¹¿Ãù@ÐîÔ™ÕTl÷°Júò¿) €'ª”( +_?¾µ®#˶ ÕÊ }Q QHÅ5‚àÝÁCÐ ç<1y{à\,Çêd*+7éy 9äÿ– Qqšê øa‚×uƒU×x^àW…i; @ýtÒ<ãô%µm.;Jg ùýX´3 ŸUæ<=—0`+P‚×2Y€‡-<ࢀ¼ ä] €øR ãhþî…Tß²ê73€!Á«¬ÕwøÆÿ áOÆÕñè\v:´Uû¼j‹Ò_3€/sÉÿø¥Ã!åd >%DMˆþðg.EðwðîëH0þ}f]¯öŸ£z¥•5aô{‹:0¤3T2 ·†u(× ØOù—} Ó`fO @Ôoë¬!ÿÚðçöt`·mÝMØÔuû… Ô… †aÌáiþYø¡Àb‡>àªxÔÚ@Öo`]€û¿ÕXt1`#œs4'@k0ÿb ?àÁB¦Áá(pâØœ-8¼G÷§ 0¦-iÕ¨ÇÌ; üòÁxÑù[€¿Ò‚Ð0ÓxXþ!Ÿµüãs£î ÚÆ„5ТàêhB0OUˆ ‡P€q€HY•ÿhHþ×åBðç´Ìÿ$£â~°@­l¾Ð|–a¡Oxr~¦÷§xÀ“1&Ì”‰ú—ý>(ÿyéÚj£âý3Ð+BlÑêsΉüÊÚµãÜ– ô«1€£xÏ€ŒM¦xØp¾ÜWþ1*ÞíúeÀ©ë@¨0kAà‡ÌŲ.¨€Msò02SæíßÝÉJ®Æ;ˆ·û>šŠù7µ#}ãx€˜5ÿ€4‘`:Xü½ÇhÙN´Õ"l l ³cG?‰€zê[à…<ÀÊ?z@Ó(˜¹çücJØ(ÀLªÀž ë¸zV¨Ö5œz@Íëùº :¬‘þˆ`Ðÿ;gà\MpÆA{7 ‹iLä_xï‹Þÿÿì]=ˆM@LeÀàpÁ`>èDáiÓ8¼ôŽ907Q6Y‡9Ìü‘3¦£ŽÄL4:0ð'¹ÄLá0ÕK ÅÈÈ@·¶wßtï 38ÝìZ¨Á]P÷^UõTå.‹˜dè”æEèüƒý®É?:Å­Þ”c®n‚™BîŸ4Æb¡>ò¯8"ì:`HP‚˜6¸ ]ÿõÿ¨†PI†€@ch ™´È>Z?àXžÍA˜ ƒ‚@€ž€ßÿí¿QŒ'å®%„43j ؂ݥû6eÑAþH1 „À†2€h29däÅuä•0P½‚ÆÅ@KÀVS]·7™/×´€¦ ã$¸«€cÿFþ?SÈhÝþŠåMªÇVv°ú¯š£0(pc&ûÞ¤<;Œâ È·ÂçLùSbBþWËR`ëšû™¡ÙVØ6è%¢Bð¦äxæve¸üƒ6åüFèQ¼Ê±l84io!¦º¶¡yûpš.vñ(ÝÀ°üš€‚¤©‡ Ïy`”@sw QŠ£8•¡µÔšMEÖÑžR< ^ "ÿ %Í×½ €ûoˆ¤Vΰ ¯« ÜŒK.ÿˆÌö_±8Cá"lþÇCF(×==Î %Z €òzØäÝÀ³í8@ÿ°i¶ŒÀ¬ $Fôç’=§ofQùã]CЧ¯/€€Í ô1ftmPñ¬¿pÿ/Š“Òõ9­ˆ´ZÐ@¶Øa® ßÈç °wX$ÿcZÅ;d0kå>qRº]¢4ÖTã5iFZ -5èLô+iýn%˜äEÀü]šêÝô?2´ÏטýBve˜Ó•\í™!žaCÈR3Èc;öC ɨ‚íÝôß'Fyfîw†©…åÁ¤„„ý ¦JïùG°ERf€u•ë fwLUð€YAæÃlKÍ ”‚þÊ?Ÿþqå0^ãÀcÒ=D:1Ã3†!(_Lïîò™`KX%ðÂ_þ³#£%eëxLš”@©â3{j˜Ò¤Gv€ìbO`• H&ÒkþQ²Íð_L¥ ±­h€¬0Q] :ð¿q€õÀ²€Q:)¼çl0m€Ö^ |À'Qù+` OI òúO@!€ª^@’KùÇ @q.ú6Oh+R’ø ¡A’w¤3* K‡²M€´3/HS"‚ùu@;ß w.ÿ!Š ïÀ«/Gq|ëô4:ë‡O¢Ow<¸Q˜PÖ}cåÌ È´!À6–EWQ@' SïU@ðI£ˆ!–E ¾G7ãøòqó€G§ï£ècŒèFî•FmíS%1 GÐ’ ðà€›ê( VŠÖôï SfTˬnü÷óÝ«ñÆN?ÍâšùÛÒIÿ>ŒãöM,„ùÿ…ê\Œ¢¿1þÿ‰œ?~{?ŠÎxš…W´2L ÈdÄÍ쟣ʠ *¨*+EàÛ4óÿ‹·sãªðQ ;¶®k·›YD‹#¶%E–i‘j”¥V1•#¹rE“°ùC!'‘³,b”H‹cꕈT‰¡iÚ:Æ`¤@IB<© „Ʀ£4/¿²B(8®µ?*µIÃ9÷Îcwæ:ž¯÷üðŽggîŒö|÷Þó¸ªþrû€Nß)€è  Œ=ÒòwƆG„¼ÉóÏØJ~ÿÿ>c3#–|ô͘ͳ·¬,£óSåà4¼Õáe &¼“ùs¥õ/—óÆ‹Ó òÁÙøN´-Y€@ N`5×äfCTò1Øöol†Ëûž˜1ëëà yû;3JŽê†eòcß3—»·GñïM戺 ø%ö¹ûlP“ЦnV¦ËºLu‚Y3àH¼„3s$uðdªhôå†=ÝvòÞ¨èŸRD¸OØÍûþ:ú§yY| ¿¦wéAßã ¶zê1¸uJÛe3åg°ϳþ:ÀMÜKñ)J4rü[p‰.ò€—èó÷e Ö¨Æ”w]“~DÀÛ Ä=K •ø‡©vsC|.v7ᄨ%Éem†î ;îíÖé3@ëï¬DúOùýÑ{…iõ ~jˆt3’A@±5¼¶‹Ãý»ä—:Kü’9²FÙK¿.÷CÕ¢„lt#û’l±|SõTŠcP¼d¹ AÜ ?£¹Å]¸vÊm¹þs}\ÿþ#j>Ã’0!þ·ô¥WíËÆÖ‰ãߢó€wX‘´4}’• €Ó°‚•_Dìô8h ˆ»ºéœ?ì\b6v’LÜd0ß;𜀧30K|È¥ƒì¿8¾]€@ݦZ«X©Í–¼¢ÃRMGÇwìÇQ”â*ÿ àe9R6¶ÀÒÅ ÒÆÀ î’ âæt¦m]‚=E4ÎýÊ8íðÖã@ê ”°Z¢ÙÙÙül}~í¬£}yý§Ý`Òé ³@j†Ä€©‹ÎqMõf/¬àJEXËÙâ ™º÷‘‹ÓX’y6xÊZ³1RmCäf@šn„ªÿ}Tÿ¥-¥©†€«]¶F.´Hx€¬€J0_ìŒa&‘6 ð‰J6—s´_¼o…µ´{J´Ä0ïÃ… JXRÿyÒ?ߦ!@ à˜­×)×ÛVàãçd°ÀöÊð± L,IÐÜÑ€·º…>éÚþB0¿)r@¬2ga$߀ф,ª?Ø4ðc¶ ÷0pÞÔô6)û.ù@-Ù•ÿÕÌÏS'K[Cù«''ž¯¹Š*cOn Œ€˜D A'kgãÁ1ê°v6ŒþÓi4°þœ:h«ð´€:Ãlzc㛤<P¥IP±>õýÊàèê»?&<ý|œ©[ïòxž¾Ûñ(¿<“è¬ïV²û¹èz’MŒÀ C" ]€*ŒK‹[vÿï»OŠ¢ÚŸ_¥aQ7Êi $H ÁD8}^÷ÑŽ>Ì °ÌˆÀtü7•nŒgyÆ"Ø2à5íyé9±T‹«"š€:¼»[ @tçȸqÅàuŠ6OR.ï7#£ãJÿ× ¨Fn^Þ©Œè0Iꬻç×ÑAÙ:NrLük¢ïFF PFFÞà·€b(£[±¨È…ÉÆ2#¢Èɰø¾æ2i„`X:>‹6$ÚÿÁ—¯Õ.7 ·Ý{Èô–0)1 Jˆ‘Îß1ø¼ò¶pACÊF”—u ÂÔÇàß™Ÿêð&½Æ.<ÿ¢ß§?—¯3ö‡ÀCD&Y¦1u L´%ÞÊQ¬Ô”?ñ¢Êm àft —,Ùb>p\õëCØù 8¸4× ôß4ÊN¿÷“üØ @ ˆ0Që59ªð‚Ü8Þb Œ}îÔÁ¨¤±®‰\g/dtñ穯ã÷»'dÀ»x…òu.5ª{D÷)À’LæZ,F3P¯Ê´(Jy‘Šš.ûpòƒä“…yÀ€  ±>`бª­ëï ± D/ L~YÀ1€³t˜Ü&€ð™–À¢†ÜÒ;ckði=Ã@/D†¸Ò$‚}0öh ÀlçS?ï6‚¿ÒS÷À¿ˆ¯à-TÔtù]´ÛÅX¡ð8ΰ ä¬lŸßêO+ÏÆ©þ7fƒ"_EùŽ€•ÿåcF·ê#vÌå,"Ý‚•ˆ&€;/œÏVP†øó§ÍäávŸÀ (ªÑä ÖPø7Ç@i*.j±æˆèL8YE €@À6Àw70{¤-š'ý‡]¬¸|( ªÃ2n²9ØÃo”´öŠdÂiº¨VG¶x˜ãG¶aN5[GÍ€xª €O –Xð]‡EQN˜a1Z‚°Sx‹ÓqÀ‚ÐŽøk:òGÚ8+Ÿ¥›Ão«nívêò‡^ØÏAY·å*› ”¦Ð1þùlÝ@¬ ÈK<ï F@EÀ¹E‰{r’-Ù!Ìš‡¢’çr8ðÜ™¢’µOï;ÔÏB‹:5m»ƒsþ°cjû\į–Ó$¸º€Ø Lô0 lR§vp_Å»9bä€)Ù¢DPæcž‘¤§!D -¬é¢™¿—¤šOþŸ½s «ˆâøÁGõÃΖcjçIýЪØ`-Š‚5A«1ø.–¶>bÀíÖ‚Á¨!Û«h° õ*­ñFkWŠ„\¥—X4JÓŒÕjE›]CýP U4¸õqÏ™Lfïvü´ %BîänîÜ…óãÌÜsæÌõ±FlZpŸ€…ãmèœðEß  €Í?ºHNßq%íI¸Ûõf˜œ|kHÍÖÄ¢©øÐEï)¥‚Z²Cb¥Ø±FÀîbaU­W€Ë‚i½“=yþGò¹ÿó‹Š.dá/×SÀ)¾SÚwà)9ç«\9 ëKaž?‹°|¥ $µ‚ˆ0 cŒJ`ñ6èThB èœs’„«ð XY0Ùt©`Ê*à ‹€§@,wà©õ®8À™¶ü: Kî5dºã§º˜}U d7¡ˆÀ ¯Ø²UÞ{*À˜U´—pˆóÐàdißÀN(€î>7˜€¾.'[VL iSx|k Pz€7¿Ï”ס¶X¹d£ñርeu<¼ïf;¹=ÀëÀW[LaÈ5ð}ý·µ€·ø¹Ó À"jñÉlá„"G„ÚkÖ@ê·g¯$2Zq°; °*j4+ªêÉN &9óò.ˆ€å¶òbŽN2J ³ {yâfxCw³À@=ðEå,¤ïÂà‰¶ZÀJ1ƒÙݱÂ0°ZzÞ¤߆¯už…ìÄ-Å6¨›b eéMiØ7ÞS¢˜( 3Œì"cZ—^¥o`‡=ë|6 ´ñ…´Ÿ€óƒ^¤=Îfþ´ö°ò}«, À<²³,°™x 7d/À>qÀã®W`{iÿéDùô$ Û¯ecIè½JsC9If"àŸ:ֵīŒ› (swü­àKö4‰ƒB+»b Ç}†tÊtUÌ®.¿eƒðEk6ûW^DK_êÁ=WG©c«Q®Û‡è5¿¬RKoDìº;€ú_2*t¢Uóe*†©/“yâWâÑ*?:ĤV \ ã9DùÌ$À§-¾æE<øXÇpìrún{–ŒÛ­[JrLÈ]ý¥¡!?‘Ó˜-uµàxK…>`€ôvc÷ý«æ¯&§: $ñ;Þ£¸1‚¸¦ñÜQýçGˆpó|woZIý/ñZn:жëéÿR±¹v‹‡Uþ‚%QŠË›¼Å~.)í€ø¶«D±¡ƒ÷˜±úÎÒÇË^êJ S·BþXSÄ Øê¹«±ÅsT¤Ö‡¼äW Ú-b¦p­iàŽÎ€éóÎl3öÇDrª<±¿Dw¶ò‘ïß •:êúãmfošüùÄD~[CŸŸ ®tW/ÆxÛ•Pâ~jÖñÙ9*Ô@½®K~Yš5¬!nqƒåÍœÇÐ(À¤ÂÁ0;óúæ‚‹(ŸkšÐK$eÈÒÖí¿•òærðº¬/ííX»š–ýV>·››&îÊ•võ{÷ÚÄpüôbS^Iuÿ\µTij¡‚/=ò£‡x¶‚ѧ€‚ô`,ï‚[l¤cá×ä$Ha/‚Ø-ñâŠBƒ×5Kò àºÙÙ¦lÆyßKÒ’tæða(¼aûO)ÿ@‰Å+`Bâ¯Ë®o¤l ²WÙÆÜÙ€óF®à„B\-¡«¸ù <õÖE}Ü[Ž.¹ €“¥™º €“Fô®³8Q½ï.®¹·uÝÀ‰—5Ÿ˜Ä1Žp €càÇ8À1Žp @´ïn˜ü‚lÏ,ŸÜÁ¡‹x›Q•|ÌÚʯ9ÌÑJ{ý­mþgã ¿k·àæhy£­ylX@<˜ê SRKYÐí± ã \AT‹E(Oõö0çõ´P² €lAo}&ÁÓ02ù¦êHgàS1Ö ·×˜§U€Q«ˆs| ÛEýà=ãtdŸ¼YMÐø¹ õ͘`ØnS¼Ç<8d j9èŽj~Þê`=€Wt™_ >a¦šº;Ö£t<A;Éžœ€ÎÒâ*0]옆7–ÀÒ´8â[¸C_Ǿ‰ÙºÝ€3@ΆÛhؘî.-3^Ncœ˜Øš³ Àÿ4¬Ý ö@¶ºšPÔ0ckภ/€hØx@K/˜(Ó`#”5´ @N¬€aO €¨  Lp)Ê6GéÀzÁðÑÐ-§ìÒî814°êâ . @^ÇaÐüêÜ  |LL¨þ”Ÿ €^@ÏÐï{ˆäi0*a{”J þxQZŠJy¯Ÿ@%þL^¨!;ªgñÙ…¸øjÝøñ7sgUuà3`Ó*{“ÞdëÝh;ãÔ(ˆ¢3dÊþP[Jª™¶£0,ŽøIÅvtF,µ«36 Ä?¶ƒÓ›HPH#[Ô¦¶JB .¹C QÌMˆÖÖIAEJchû~ݽûúgï½Ù=çžóžó>ç=ç¾÷ä¾aŒ´mÛCK]0¤ûõB»÷üò‹$NDËŒmÓÛýÏDÔAx©¹çõ>ÿ‡g_t5D³ÇœÕ¾Ñû¨Žp þvAÖÏ¡\¨ó¶çÿ¦OFÇÀÞÞhOlv:²%jÁ'˳L÷´'®&Iž¶Ô9 ÌmèÇ+HøOLãÖè¹ÔЮ¿ §Sð~Ε^ÿlï3/ÚL<¾( ïš±ûëÚ‹Úø–C_ãSJ} •‘5"ÙïØCßÔY´Ÿ.ªC)䥻¿k¼ ,€„ýèPxâhÀ@uYDÙŠ½ú@¥Eß½pRGùŽßÒ Ï(ßGÃé »Äà );àSãŽi³”Í™&SˆÖ¹¸^â>…)6WQ)ì½.‘¡·³h(ùyŒYç‡OŠdßÅuÐ;Œ_†6HP’.,2 yz).\ŒW¤py±C…‹æ[Õ‡çÉ·r›¥FÂz§T6î‹Å;Ÿ™$áåÇ+ë½quK¬me•–ðǰwbN€®Ú"wÝj5¨³w]æ3¾8_DKG|Mì*R¢ À‹XçÑn¥Ái®ÉSÄ€¾ÏÊ€Wsn:³Ü„Ömm–†'³™:ÍŒ(ÏÆR.†-<ÐDiï&àBÄñR m z{_Ãê¯@} A-€,€Šbâš(ņ~‰ ÙNL |E+åjÀÀ DÏ2c«ÒÀvxåô&ºrôõ@G—–(·¦¤TˆÇ´¾ƒ´¸œ™—5À‹1¶Ý@-v­?BŸÏZ<è/[x)iâHÃáçÐß& ã1ñ©Ò±ô) +€W蕾kéeÑíPÚ©ñçþ ÂÝ`ßÛs»T­oBvˆšf3q8úfÏVÈj‚$-<`•¨Îż~ñÆ”Ú2r'ŒÈ2‹¸A«éÉÔÒ/í’c¿˜H°f÷hþò3´iÄ2µ.`Ì×x«LÆ6À!É–«a“MÅ¿µ =딚C^rì¿rËSX`ÚC¶|¬ˆØv ©B 8`€t.8Ž’*¨Óüu–÷¡Ö* ¹†"ˆr›(s2ÆÚ$’z”Ž+«RÚHˆ”íƒZ‚<||%®PïÑe4IO!X+lƒ'$ï+ŸtÆbùÅ™JÀcñ¶3>h“°l?€©T Ž0–‹b·9‹@DÞZ‰òJzJ¶Ï¡Ø4”3@› €/o‹ —‹y5lÐJI\ŠrØ/k‚˜D…WÃÑÁL VÕ-å>¨'³~ U©™ Ððùn$¯¥…ðÄuïeJËkmð U­w’¢Œ§u“2\ª(µ äá|¿´Â_üË¥cá.†÷é¯>÷â•rgC%rûWpû·“³°‹rЃ À Õ'<¢r™™f…mšÏQ«*=fÁ0FÏb§¤X¾rlåBA–ûRè$ AÙÎ\ÄVš/*§hm¾˜:΋źD€d^€xÆ•€>´:”¨”Ó‰PGpJ´°IÇÔ†šø•¾Ca½™Ò;æ˜ xî{xÍ‚¹p*Ww™ò¨¥õØL*×à@‰Ó‚?–ÎIÐ v3{ Ͷã1n~%~6Ñ3]Ÿ…tøtŽd`T&Åã܇¬œê$lÛN Vî`qAyŠe ù¾Èü‰º^BƒT2PëhVGPÒ¾ŠùÚ÷P"^Û®¡O€»"+ë5²YdZS`$B™.9¬![>ÈV!–Ïo¯^ý“ˆýOh6c¿”ß÷»YØ÷cÔ|ålÄ(O`ÃêÕ?·’X¨æ‰MM È\Nî•b¼JÕ \øt";ï[¦§ œæ˜xªO xõêDz0éÆœâ;à”@'®šdÆ ÌE+Dž‹F³îŒU½Ô>ýÄé¸óª='F~6Í€ŸðÅ-Àui‹À Ðè¾Dzt8#­2“°rÊ €´¡- ½–Úá?_—ê ü“•¿¥ð ™é4c¢TWß,ã îÍ‚{Ð.`çN£&N†§Îç€ x,’¤¤y¼¥¹‚;’ðGTÉñ°ÔœÌäC˜e΀-ÑtºiY0NÀ8‘‹U‰)^æT$n±Ü9Ô'@MSc÷‡ ìY€{7䣸;eÓÑmT› ßI‹—ç¹ ž™‹+1ò½åpQŸÉ‘€J'ÇlOÚZ:e˜QÖ€.${œˆ{#Sà;xüÚéd8.–Òúë ÌA@§U`åܸnGn™€öóê­ÞüèRš‰*¾(_4€9Yù[ÙèTêAɳ2Ò†â[ÒðQÒUz•ù gÀM¼T©ÁTÄÇ=.°§Ûî@]ÖwºäͰ/ûm [-í^³"?ŒSjw^µ¦àŸq Š/q2äÀFÙØ)m¨I@ÂêFÕT:¥y¼P¨ñ8P`O_2å!hЬ–Aò–©ç ‹yä (_ûä<ü”€Žkw›®˜7•8‰ç€½¿© ­©4)eÂ0߆ÿ{ ðdðøcgP!Yé’IúK4,aV´ÊžÌ ÷û¸I„óÀØdÛöáprœ¼Î]~-7$ùäìÔ"šiÈß­åáLûf€tÎ ÀvgXšaH£ WMÓ…eH§§Ù·w¬tDü.ÃzN„ñüèûƈ–êvo¿šöZÏ €AÒà,ƒGô €&Ê@VÖ:tte¸ ÜCn o+u¨ ,€l—øÚÛŽJMÏ€ø<.Ë_ŒünÙ¸T± Ò‹r@⽟‰/ž‘颿 `¯ÒZ³ðFFGðä#"ó±…äãhøRúy2€‚]3ó K æ SÕ¹ž@±¢FOfhšU†3pZšWª£¤j*;_Ïí Tõ"_:•Ø$ñ…S¨”:Pp VMÊk©Óyét)ÍyÈŠÒ“ÈÀ~u,Ê J 8ô:#¬ü³pfh%ÎÚìÎè ü«xî[3ÀuØu@™’¦x4€šD¼‹ÁÝœ ÀB®£B FLU'Å]—Uւ什R¥Ü_AØìo8šχ~§ñWfF|-$ȼ¤(+ͲÑ̨H€OÇ .}W*Ò¾3;âîo·Ô¡t KèÛ’½L wÐc,‹eJ€ hE¡¬:•ƒb©ƒ…µXl²Àd±×Ä(2Ý•Áíì£ÛBÓ?ÈÌÈ¥¤œs‡AX¢¥4þÿöìæÄ:–Þ“Pî;ÔÅLL •<@|ɪtžŸ†MT‘]#“ªÀêš+J®ç6L‰usðâ£l9äÒâä¡1E-TïJêz"[›"ùhˆHÙ.4s7체ߦ°ƒ›{™š, îP¢3c¹âhËa̪>x¿ƒN ñ±.‹ñ×}íEb¬³±;H ÚÕçs9g6‚¬:Ù ÷}¡Äè£&×@Ö$ÈòV©·ÃÀ™¶Tª–RDë7íc8* Ó—‹Å@ ršg387ʶ¡Í–¹©)Ñw`Ä’—$]É7ç^lߘlç(›ÒÉv±‹6„êÛË'NùÒ,ÏI¨`ãÚ³<6NrbÙL< }‹E%ãá…4!pÔc}"™q ‹].bß¾¾ Ò<5ÿkïþ]¢8€¿É)Q¾SãÝWAê?àà$ÎE¨£éâPh'ÁAóW¸h¡8)ué.B-µþ N…Ó¤X0ÙîQ¼Ïz.ðåîñ /S1%L¬5uñõ{Ú†8¬+d¼FÚ!´¼¿õÈ*ÒÛö…Z›õc8oTð‡ŽÞ5äø&ݽ~îäxYtÂÉŽ—f΃ê_N =IO~$€Ÿ¿mEµEƒ¬ü ÀNöömL~ÖüØ¥ì¢9eð”À§°jwfO€4‘8«`UþT2yƒY¶HÅ%Ý cäÅî{ô¨l̺ ·öÓu®Õ<ÑpéfÙxUr^Ã˃x¨ÀªD䯋6¦ê¯ï¨^¢G · ZåkrïwºàÅýuL“/¦Uc©¾óIEND®B`‚sleef-3.3.1/doc/html/sleeflogo3.svg000066400000000000000000012327051333715643700171360ustar00rootroot00000000000000 image/svg+xml sleef-3.3.1/doc/html/texlike.css000066400000000000000000000125011333715643700165170ustar00rootroot00000000000000body { margin-left: 1.5cm; border-left: 0.0cm; padding-left: 0.0cm; margin-right: 1.5cm; border-right: 0.0cm; padding-right: 0.0cm; margin-top: 1.0cm; padding-top: 0.1cm; margin-bottom: 0.5cm; padding-bottom: 0.1cm; font-size:12.5pt; } h1 { font-family: arial, sansserif; font-weight: bold; font-style: italic; font-size:1.8em; margin-top: 0.8cm; margin-left: 0.0cm; } h2 { font-family: arial, sansserif; font-weight: bold; font-style: normal; font-size:1.6em; margin-top: 1.5cm; margin-bottom: 0.5cm; margin-left: 0.0cm; } h3 { font-family: arial, sansserif; font-weight: bold; font-style: normal; font-size:1.2em; margin-top: 0.9cm; margin-bottom: 0.5cm; margin-left: 0.0cm; } h4 { font-family: arial, sansserif; font-weight: bold; font-style: normal; margin-top: 0.7cm; margin-left: 0.0cm; margin-bottom: 0.2cm; padding-bottom: 0.0cm; } p { font-family: "Times New Roman", times, serif; font-weight: normal; font-style: normal; margin-top: 0.0cm; padding-top: 0.0cm; margin-bottom: 0.0cm; padding-bottom: 0.0cm; text-indent:16pt; margin-left: 0.0cm; } p.noindent { text-indent:0pt; } span.normal { font-family: "Times New Roman", times, serif; font-weight: normal; font-style: normal; } ul { list-style-type: disc; font-family: "Times New Roman", times, serif; font-weight: normal; font-style: normal; margin-top: 0.0cm; padding-top: 0.0cm; margin-bottom: 0.0cm; padding-bottom: 0.0cm; margin-left: 0.8cm; padding-left: 0.0cm; } ul.circle { list-style-type: circle; } ul.square { list-style-type: square; } ul.none { list-style-type: none; margin-left: 0.0cm; } ol { font-family: "Times New Roman", serif; font-weight: normal; font-style: normal; margin-left: 0.8cm; padding-left: 0.0cm; } a:link { margin-left: 0cm; color: black; text-decoration: none; } a:visited { margin-left: 0cm; color: black; text-decoration: none; } a:hover { margin-left: 0cm; color: black; text-decoration: underline; } a:article { margin-left: 0cm; color: black; text-decoration: none; } a.underlined:link { text-decoration: underline; } a.nothing:hover { text-decoration: none; } i { font-family: "Times New Roman", times, serif; font-weight: normal; } b { font-family: arial, sansserif; font-weight: normal; } hr { margin-top: 0.8cm; margin-bottom: 0.5cm; padding-top: 0cm; padding-bottom: 0cm; } // table { margin-left:auto; margin-right:auto; } td.caption { font-family: times, serif; color: black; } td { font-family: times, serif; } table.lt { border-collapse: collapse; border-style: none; } td.lt- { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-width: 1px; border-style: none; padding-left=0.2cm; padding-right=0.2cm; } td.lt-r { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-right-style: solid; border-width: 1px; border-color: black; } td.lt-l { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-left-style: solid; border-width: 1px; border-color: black; } td.lt-lr { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-right-style: solid; border-left-style: solid; border-width: 1px; border-color: black; } td.lt-b { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-width: 1px; border-color: black; } td.lt-hl { margin: 0px; border-style: none; border-bottom-style: solid; border-width: 1px; border-color: black; height: 0px; } td.lt-bl { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-left-style: solid; border-width: 1px; border-color: black; } td.lt-br { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-right-style: solid; border-width: 1px; border-color: black; } td.lt-blr { margin: 0px; padding: 4px; padding-left:0.3cm; padding-right:0.3cm; border-style: none; border-bottom-style: solid; border-left-style: solid; border-right-style: solid; border-width: 1px; border-color: black; } // pre.white { font-family: arial, sansserif; font-size:1.0em; font-weight: normal; background-color:white; overflow: auto; margin: 0.6cm; margin-top: 1.0cm; padding: 0.1cm; color:black; } pre.code { font-family:arial, sansserif; font-size:9pt; font-weight: normal; background-color:#fbf8ef; box-shadow: 3px 3px 3px #aaa; overflow: auto; margin: 1.0cm 1.5cm 1.0cm 1.5cm; padding: 1em 1em 2em 1.1em; counter-reset: line; color:black; } code { font-family:"Consolas", arial, sansserif; font-size:9pt; counter-increment:line; } code:before { content: counter(line); display: inline-block; border-right: 1px solid #c0a0a0; padding: 0 0.5em 0 0.5em; margin-right: 0.5em; min-width: 2em; text-align: right; font-size:9pt; } sleef-3.3.1/doc/html/trigdp.png000066400000000000000000001311171333715643700163440ustar00rootroot00000000000000‰PNG  IHDR óúX¬bKGDÿÿÿ ½§“ IDATxœìÝ{\”eúøñ{å0ˆÈA@"A µHW6ÑÓrCRËJM…/R•Çܶ—x®]ب¯%™YïWÌs?×sß×}’ÏI¦R©Ý]æ@ ° À›@ ° À›@ ° À›@ ° À›@ ° À›@ ° À›@ ° À›@ ° À›`—œœÜÑ9ð;#GŽB >ÜÌÇvåL·há 0 Ãœ={vÊ”)^^^NNNýúõ‹‹‹“Ú/^éãããëë;gΜêêjÝë ¦»Û¡C‡®]»v̘1NNNááá?ýôÓ‡~Ô½{÷ ¨T*ÃWÌoG“CkvÛæb:tÙ²ecÆŒ Ù»wo{ ¥é­·ÞêÓ§›››··÷¢E‹tϱÍA 0 côìÙsðàÁIII;wî\[[»{÷n//¯ö¦“––öú믟;w.;;[kïc=ÖÐÐpöìÙüüüK—.Íš5K÷:Ø­bÓ¦Mo¼ñFuuuxxø#ݺnÍÏÏïúõëÕÕÕR Üæ­ªRUüÍ7ß踖UGLKKËìÙ³'OžüÝwßmÚ´iÞ¼y:âËÊÊšššŠŠŠ‚ƒƒ5Û»uëvóæM鳿-ÊwÝÕö-HMMM.\ð÷÷BIí寥½n dÈŠ™APPP›‹)„ÉdÒjÊ”)S¦Lùõ×_7oÞüè£^½zõv¿A@'Ã=À€ŽqåÊ•E‹:uª®®®¦¦&==½´´ÔÀáôë×ïøÃâÅ‹®^½ºråÊÖ1111ñññêkwîÜÙÔÔd`Ìš5k.\¸°eË–íÛ·KÏ—Ò8yòäùóç———«TªÂÂB©$»çž{öíÛ'„hjjJMMUëëë«P(Zçúh̘1ž••UQQáëë)=ßÈÁÁA+fË–- 3fL·n݆ öÙgŸ©O!êŽ9tèPJJJvv¶\.ŠŠZºtéôéÓoܸ¡£Ï?þxÀ€‘‘‘®®®S§N•Î÷®[·®°°pðàÁ÷ßÿ½÷Þ«téÒ¥{÷îíÑ£‡æµ¸êyÙÙÙ 0 44ÔÓÓsË–-†/©Žn dÈŠ™A›‹©EïBݼyóÕW_õññqww_³fÍ®]»œÅm~ƒû÷ïß¹s§ég 0™ê·×`¥:4mÚ´6_„ Æ`€U:uêTaa¡ââÅ‹¯¼òÊO<ÑÑKÇC°VéÒ¥KÓ¦M»víZ·nÝþô§?½öÚk°t\ ° \ ° À›@ ° À›@ ° À›@ ° À›@ ° W777/Y²ÄËËK.—ÇÆÆVVV“˜˜(Ó––fÞÜ–Ëâ àõë×geeåää”””466Θ1ã¶b~óüóÏ›1q€E³ïè´¥§§/[¶,,,L‘’’ªP(h`Œ££c‡d°d–u¸ªªêâÅ‹ÒfHHˆ³³ó™3g ÉÎÎö÷÷>|øŠ+êëëÍ™<À’YÖàÚÚZ!D÷îÝÕ-nnnR£!1Ó§OŸ5k–OAAÁ¢E‹Îž=›••ÕÞXFŸÀèT*ÕwbY°«««¢¦¦FÝ¢T*¥FCbÆ'µôíÛ×ÉÉi„ 7nÜËåmŽÕØØh”ÔK&“™g Î:“²–±˜cuà@æˬ“š¯?Fõ®1⛲’±˜cuà@æ‹IYËXfž”Qú±¬K =<<üüüN:%m*ŠúúúðððÛBtíÚU¥R577›!m€å³¬X‘˜˜˜ššªP(ª««—.]%=Ý*;;;##CwLFFFqqqMMÍñãÇ“’’&Nœ¨uö`³,®^¾|ù£> “ÉÔ7ñîß¿çκc>ùä“ûgÏž3fÌ=zôÖ­[;fËc¾‹¶-Mg½2¾óŤ¬e,&ÅX89Çâ`k‹IYËXrRæ‹IYËXLÊBƲKNN¾ó^¬ÑÊ•+Í6÷1cƘg Î:“²–±˜cuà@æËl­ü§þ˜ä?g,¾)k‹I1Vdα˜”µŒe¾ÿ'©|ã 0Êlg€°pÆ*ß,î`LLbäÈ‘™™™þ}G'À¬Îž=ûòË/;vìúõë~~~QQQï¿ÿ¾bâĉƒ JKKÓ ž8qâþýû5[<8~üxÛ|ðÁ/¿üR+Õ«WôÑGååår¹üþûïOMM ÖŠyùå—?ûì³óçÏ»»»?òÈ#¯½öšÑ_qzòäɵk×;vìÆAAAS¦LY´h‘§§§‡X¼xqhh¨ôÙÍÍ­¦¦Fsoyy¹¿¿룎=úç?ÿùÔ©SNNN111|ðŽÃ›››_~ùåÍ›7×××O˜0!==ÝÛÛ[Ñ^{ÛžyF\ºtÇÓ½½z‰·ß6ëˆBÎ6E¥RMš4©Gyyy×®]ûôÓO‡ ¢ûçž{®AÃ<`xû¾}ûZw½oß¾_~ù%77×ÍÍmÚ´i­c._¾œ––VTT´gÏžC‡=ÿüów6om_~ùåèÑ£ûôésôèÑ«W¯îÙ³G¥R8pÀÀÃoݺeHØ´iÓÔpee¥zYžyæ™Ñ£G·Yýþë_ÿš4iÒÔ©S E^^ÞO<¡ûðõë×geeåää”””466Θ1CŠo¯½mf®~…¿übî…À€M¹|ùrIIIRR’ŸŸŸ££chhè‹/¾¨û;;;G wÝu—áí]ºtiÝáÈ‘#Ø£G€€€€€…BÑ:fÓ¦MãÆóññ‰ˆˆX°`Á·ß~Ûæ\ž|òI__ß9sæTWWKíC‡ýóŸÿ6dÈo¾ùFë@•JõÌ3ÏÌ™3'---44T.—4èµ×^“ªMÝ.[¶l̘1!!!{÷î½~ýú /¼äáá1yòäÒÒÒ6gª¾ÚÁÁAZ“–––íÛ·ÏŸßö£_y啹sç¾ôÒK&LÐ}xzzú’%K¼¼¼RRR>,­g{í6ް!={ösæL{톤׹Q6D&“=ztìØ±ûÛßÂÂÂÞÖw+æ»ï¾ë©A}FÔö/¾ø¢Í>yä‘Ó§Oçää$$$´Yܪ½óÎ;‡JIIÑj/**:räÈ›o¾éæææáᑚšúùçŸWTTH{¼¼¼¤ÊÊÊ*++5½råŠÂÏϯõpº»÷õõB\¸pa÷îÝ7nôööîÚµëš5k~þùçüü|]ëø›ôôô¹sçvíÚµõ®ššš¦¦¦;v|øá‡—.]š>~ذaNNNË—/wppøúë¯Û;\z0˜æÃ±”J¥««k{ízÓëô(€åèè×»wïÓ§Oësqqñ× ¾××vGGÇ™3gª~£.’ÕT*U}}}›¥ãªU«RSS>ÜÃÃÃÏÏïÔ©SÒ¦B¡¨¯¯o¯ÝÅéÜ(€råÊ•E‹:uª®®®¦¦&==½´´tÔ¨QÒÞæææ›T*UëÆæææ6ƒÕíºµ´´¬]»¶   ¦¦&//oþüùAAAƒ BdgggddHa‹/~ÿý÷<póæÍ_ýU«ŸàààÑ£G/\¸°¦¦¦ººzñâÅ“&M’®OÖK&“mܸqÓ¦M‹-*((¨¯¯—^ µ}ûv» Œ‰‰‰—N_»vmçÎMMMºÇ­­­Ý¾}{bb¢Ž˜gŸ}633ó‡~hll|ýõ×oݺ5~üx‡'&&¦¦¦*Šêêê¥K—FEEI%t{ímëÝ[wæÆ×§¹GBð`À¦899ݸqã©§ž*++ëÒ¥ËÀ?úè£1cÆH{7lذaÃuð?üкñƒ>Õ^»^¹¹¹6l¨ªªòòò3fLff¦½½½bÿþý%%% ׯ_OMMBH…±ÂÁÁáæÍ›Zýdee½ôÒK Éd&Løûßÿnø:Lœ8ñèÑ£kÖ¬ùãÿX__ß»wï)S¦H\6°Û-[¶¬Zµj̘1—/_öððˆŠŠš:uªîA·lÙâêê£#æ¹çž«©©yðÁëêêî¾ûî/¾øB]~·yøòåË•JedddCCCtttVV–îö¶ýþåϘLú¥Ž ’Élwî« kû¿£z×ôyÐÑŒU¾q 4À&Pl0À&Pl0ðßÛ¿ÿý÷ß/}9rdfffÇæÓšefe¸)S¦lݺµ£³øïYûúw2Àè Ξ=;eÊ///''§~ýúÅÅÅ !bbbfÏž­9vìØçž{N1qâD™L¶iÓ&õ®üQ&“¹¸¸H›'N|ñÅu ªR©^z饕+WJ›‹/V¿P×r+«­[·FFFº¸¸H¯,Rûå—_¦OŸîéééêê•››ÛúØæææ%K–xyyÉåòØØØÊÊJÃû_¹rå²eËï| ZNž<ãéééää²|ùò«W¯}Íõwss“ýÞ… ZrôèÑ?þñNNNîîîsçÎÕ}l{kkÈš«¿"dóÍú_ÿW¹È†£€ÕS©T“&MêÑ£G^^Þµk×>ýôÓ!C†!æÏŸÿÉ'Ÿ(•JuäÏ?ÿ|äÈ‘ùóÿÿ{ÆBBBÞ{ï=õÞ÷Þ{/44Ôðq8ÐØØ8vìXisÚ´i·u¸qݺu«Ívceåáá‘””ôúë¯kµ'&&VVVþç?ÿ©¨¨6lØÃ?ÜÜܬ³~ýú¬¬¬œœœ’’’ÆÆÆ3fÞxx¸··÷'Ÿ|rçSÐôå—_Ž=ºOŸ>G½zõêž={T*Õ <¼½ÕnMsý+++~óÌ3ÏŒ=Úßß_+þ_ÿúפI“¦NªP(òòòžxâ ÝǶ·¶†¬¹Z±ñ =Î]6÷ˆ `X½Ë—/—””$%%ùùù9::†††Jgn'NœèååõÑG©#322FŒ!•ÇBˆ?ýéOÅÅÅùùùBˆ_ýõã?Ž7|ܽ{÷Ž?^&“I›š»ÖÖÖ¾ð }úôéÖ­[XX؉'ÚkT{ûí·‡ªÞ,..¶³³+))B\¿~ý…^ òðð˜}ÜÜܼ½½-ZÔ:«Ë—/?ùä“>>>¾¾¾sæÌ©®®V÷óç?ÿ9:::,,lÈ!ß|óMëÉNœ8ññÇïÝ»·Vû¹sçüñ^½zÉåòÄÄÄË—/WTThŤ§§/Y²$,,ÌËË+%%åðáà …ÂÀþ…ÑÑÑ{÷îmÝþ_OG¥R=óÌ3sæÌIKK •Ëåƒ zíµ×¤jSG·š«ÝÞ—¢Esý[ZZ¶oß®þEŒ¦W^yeîܹ/½ôR`````à„ tÛÞÚ²æ6ˆV¯gÏžƒNJJÚ¹sç¹sçÔíwÝuW||¼úoccã‡~¨YutéÒeΜ9RÀ®]»† Ò¿ÃÇýþûïÛ;¹:kÖ¬üüüÇ×ÖÖîÞ½ÛËË«½Fµ'Ÿ|òìÙ³yyyÒææÍ›ÇŒ$„xúé§ÏŸ?âÄ‰ŠŠŠ!C†ÄÆÆ¶´´Ha»víÚ¾}{AAAxxø²e˾øâ ¥RYTTôøã·Îê±Çkhh8{öl~~þ¥K—fÍš¥ÞµwïÞmÛ¶åçç/^¼xæÌ™†/Â’%KvíÚUQQqãÆôôôQ£FùùùiTUU]¼x1""BÚ qvv>sæŒáC 2äûï¿7ât JJJæÌ™£Õ.ý.CG·êÕž>}ºŽ/E¯;vØÛÛO›6M«ýÖ­[ß~û­££ãÝwßݽ{÷Q£F}ûí·:Žmomï|Í;+ `X=™LvôèѱcÇþío xûí·¥]qqqÒ¹Ö={öܺuK«2üŸÿùŸ­[·Þ¼y3###!!á¶Æ½víš««këö²²²O?ý4##£wïÞ2™lÀ€ÁÁÁm6jÕ£G˜˜˜>ø@¡R©>üðÃyóæ !.\¸°{÷î7z{{wíÚuÍš5?ÿü³tÖZïëë+„èÒ¥‹J¥úþûïkjjärù}÷ݧ•UQQÑ‘#GÞ|óM777ÔÔÔÏ?ÿ\}¶6!!A*Èy䑲²2Ý·Œjºÿþû[ZZzõêåââ²wïÞ÷Þ{O}J\R[[+„èÞ½»ºÅÍÍMj4«««ú4¬Q¦såÊ!„V¡nH·êÕÖý¥è•žž>wîÜ®]»jµ×ÔÔ455íØ±ãÃ?¼téÒäÉ“zè¡‹/¶wl{k{çkÞYQ 3pww_µjÕ¿ÿýo¥R™œœœ””ôÅ_!zõê5yò䌌 !DFFÆÌ™3œœ4ìÛ·ïСC×­[÷ã?>ú裷5h=Ú,*JJJìííûöí«·QËܹs·mÛvëÖ­C‡)•J)Ÿââb™L=hРAƒ <ØÙÙY]’©«¸>}úìØ±cÓ¦M~~~øÃ>ûì3­ÎËËËíííÕ—¥MOOO郴>uuu†¬@KKËøñãýýý¯\¹R__¿páÂÈÈÈ_~ùE3FúAMMºE©T¶ù‹ƒöÔÖÖº»»q:Rm¬UXÒ­zµÛûR>þøcõCª4o>×ôý÷ßçææ¶yý³\.—ÉdñññÆ srrZ¾|¹ƒƒÃ×_ÝÞ±í­í¯ygE €NÅÑÑ1..®wïÞ§OŸ–Z²²²NŸ>““ÓfÕ‘°fÍšÙ³g;88ÜÖXÆ ûé§ŸZ·555émÔíàà°o߾͛7Ϙ1C*Þ¤Ûb¿ù曳¿©¬¬|ðÁ¥C4O·N™2嫯¾ªªªš3gΣ>ªUõ455©;,ep[SÖR]]]ZZúÜsÏIÏRNJJjllüî»ï4c<<<üüüN:%m*ŠúúúððpÃGÉÏÏ6l˜VãL'$$$((hË–-Zí*•Jw·êÕnïK™9s¦ê7nnnm޾qãÆqãÆµù«''§ª7¥á4¿b­cÛ[Û;_óΊVïÊ•+‹-:uêT]]]MMMzzzii©úÝ3>ø §§çÔ©Sÿð‡?„……µ><66öÀË—/o½«¹¹ù¦•J¥¹wÊ”)šgçÔ'Ož<þüòòr•JUXXXTTÔf£Övvv³gÏ~ë­·vïÞ-]ÿ,õ/‡¼víÚÎ;›šš´Ž-,,úHŠ9sæÌˆ#ºwïÞ£GaÆýóŸÿlUEEÅŒ3zöìéíí=kÖ¬+W®Híšý444!~þùg­Ä4ß%‘?{öìäÉ“ÝÝÝ]\\†¾wï^)>..nܸqÒ禦¦E‹yxx8;;ÇÄÄ\ºt©uL{ýŸ9s¦W¯^¿þúk뵺“é¨Tª'NüéOrwwwtt8pà²eˤ éVÇ—¢EsýU*ÕÿþïÿúúúÞºu«u¤¤¥¥eõêÕ¾¾¾...£Fú׿þ¥ûØöÖ¶½v+e¬òM¦úý/±l‡Lf»sXY×ijS½kú<èôå—_®[·îèÑ£H§;uêÔÛz05:c•o¶[R,0c•ovpssó’%K¼¼¼äry’[ IDATlll›Y×£R©ÆŽ+“ÉÔ¯JKLL”iHKK3ÇLÖ Ã àõë×geeåää”””466Θ1ãvcÞ~ûm{{{­C~óüóÏ›p«¢]@šMzzú²eˤ§ð¥¤¤„††* ­ç’éˆ)**úûßÿ¾oß>­çøÙÙÙ9::šqëÐ1g€«ªª.^¼!m†„„8;;Ÿ9sÆÀ•J·nÝ:­ž³³³ýýý‡¾bÅŠúúzÓO`:¦®­­BtïÞ]Ýâææ&5óöÛowëÖ­õUÓÓ§Oß½{÷‘#GV­ZµcÇŽ¹sçêNCö{ÉÉÉR»ú…ÑBˆÂÂB͈kîÒü\WW'=°žèèèÁè=è`E³ z z z0°‡ääd­bMIÇ< ¹ªªÊÓÓó»ï¾9r¤Ô"—Ë?øàƒÇ{LoLDDÄÿøÇ“'Oúùù]ºtÉ××÷ôéÓC‡Õâ믿ž0aBmm­\.o3ž °p<‰u?ÚÃÃÃÏÏO]å+ŠúúúððpCbŽ;võêÕððpOOÏÁƒ !ÆŽ»bÅ ­!ºvíªR©š››M?€è°³ kÖ¬Ù´iÓ_|áåå5wîÜšššÃ‡ !²³³¯]»–Ð^L}}}uuµÔÉ•+W†¾ÿþ#FtïÞ=###::ÚÝݽ  à™gžñõõýüóÏÛK€3À Ç`$Æ*ß:ì)ÐË—/W*•‘‘‘ ÑÑÑYYYRûþýûKJJ¤¸ÍgggggçÿŸ½½½¢gϞҭŸ|òÉ+¯¼R[[ëë룾§Û= Ê`€…ã 0ë¾3£Ø `€M Ø `€M Ø `€M Ø `€M Ø `€M Ø `€M°ïèæ&›¯?Fõ®éó0/Îl0À&Pl0À&Pl0À&Pl0À&Pl0À&Pl0À&Pl0À&Pl0À&X\ÜÜܼdÉ///¹\Yy»1*•jìØ±2™,//Ï\Y,ÅÀëׯÏÊÊÊÉÉ)))illœ1cÆíƼýöÛöööæÊ`,®NOO_²dIXX˜——WJJÊáÇ …á1EEEÿûßÓÒÒÌž8À¢YV\UUuñâňˆi3$$ÄÙÙùÌ™3ƨTª¸¸¸uëÖyxx2œì÷’““¥öS§N©c ëêêÔ›š»4?×ÕÕ¶¹‹èèÁÒz0…ϦzÐÁŠfAô@ô@ô``ÉÉÉZÅš0™J¥2V_w®¸¸888ø§Ÿ~ ‘ZüüüV®\oH̆ 8ðé§Ÿ^ºtÉ××÷ôéÓC‡mo,™Ì²æf#›¯?Fõ®éó€>|SHŒU¾YÖ½²®®®Bˆššu‹R©”õƯ_¿þäÉ“æJ`M,ëh???õéo…BQ__nH̱cÇ®^½îéé9xð`!ÄØ±cW¬Xaæ)¶ì×&QwS×\u€dYg€…‰‰‰©©©ãÇ÷òòZºtiTTÔÀ…ÙÙÙ×®]KHHh/& `̘1R'W®\>|xVVÖˆ#:p.€­q\ ?†Ë5a"\- ?Õa"W/_¾\©TFFF644DGGgeeIíû÷ï/))‘ à6cœ¥`é5H={öìÞ½{Í`Yl÷AP<ËŠèý ¿ÿ³ü²ÖZtÊoŠI@gÂ@h1VùfY÷`"À›@ ° À›@ ° À›@ ° À›@ ° À›`ßÑ €E“Í×£z×ôyàŽq`(€6`(€6`(€6`(€6`(€6áö àŒŒŒñãÇ›(Lçö à=z™&LÈþ¶¢§OŸ>}út¥€éè9|âĉ¢¢"õfQQÑÉ“'MœƧ§Ž‹‹kjjRo655ÅÇÇ›8%ŒOO\RRÒ¿õfÿþý‹‹‹MœƧ§öôô,//Wo–••¹»»›8%ŒOO3wîÜóçÏ«Tª¢¢¢yóæÅÄĘ'3ŒHO¼zõj—~ýúuéÒ¥o߾ݺu[»v­Qnnn^²d‰———\.­¬¬4|øðZ[[+„èÞ½»ºÅÍÍMj40æ‘G9}útNNNBB°aäÆéÓ§ïÞ½ûÈ‘#«V­Ú±cÇܹsu§!û½ääd©ýÔ©Sê˜Âºº:õ¦æ.ÍÏuuu………mî¢cõ ƒÍ¢Ó÷` ŸÅ1# œÅ³„YÞƒ!3²üYüߦÍ‚èèá¿èA+š=ÜnÉÉÉZÅš0™J¥Ò±ÛÃ㢢¢k×®ƒ :{ö¬¢[·nš‰þwªªª<==¿ûî;õÅÌr¹üƒ>xì±Çn+FñóÏ?:´¬¬ÌÃÃC³ý믿ž0aBmm­\.o3™LÏÜa9dóõ¨Þ5KÐGï7%¬ð˲¾Ip ´Ì€K -kR°¾oÊrR`~B‹±Ê7=g€år¹R©To–••y{{ßù¨~~~ê*_¡PÔ×ׇ‡‡ßnŒB¥RÕ××WTThµwíÚU¥R577ßy¶€N@ÿ{€ããã/\¸ „¸|ùò‚ 4¹|'SSS EuuõÒ¥K£¢¢¤'`egggddèˆiiiY»vmAAAMMM^^Þüù󃂂 $„ÈÈÈ(..®©©9~üxRRÒĉ]]]’-ÀÚé)€×®]kggPXXèããÓµk×W_}Õ(/_¾üÑGŒŒ ÉdYYYRûþýûÕOun/&77÷ðòòzøá‡{õêõÕW_ÙÛÛ !>ùä“ûgÏž3fÌ=zôÖ­[’* 0èBê .÷îÝ;00Ð 9™÷[î¶òvë›÷·I€µà ´«|Óÿ`!„¿¿¿¿¿]]]}}½³³ó €™é¹zÑ¢E¹¹¹BˆÏ>ûÌÓÓÓÝÝý‹/¾0Kb“žøã?BˆÞ½{oܸÑ,‰ÐF†Óÿ¤´ù+¢ç)ÐtÀ›@ ° À'Ož¬¨¨0u6˜ˆžøêÕ«=ôP¯^½FŒÑ«W¯‡~¸ººÚ<™`Dz à… ¶´´äçç744üøãÍÍÍ .4Of‘ž× 8pàÇôððB„††nݺ5,,Ì,‰`LzΫT*{ûÿ+’íííU*•‰SÀøôÀ<ðÀ¼yóÊËË…åååóæÍ{àÌ’Ƥ§~ã7®^½Ø¥K—ÀÀÀªªª´´4ód€éº¸±±ñ›o¾9räHQQQyyy@@@pp°Ù2Àˆtîҥˊ+„ÁÁÁQQQT¿ë¥«–ÉdÞÞÞ—.]2[6˜ˆž× Ýwß}÷ß|||¯^½d2™Ô8sæLÓ'€1é)€÷ïß/—Ë·oß®ÙH °:z ༼<óä€Iéy Ò‰'ŠŠŠÔ›EEE'Ož4qJŸž3Àqqq»wïVo655ÅÇÇÿç?ÿ1qVtv±±úczî1}Ø=pIIIÿþýÕ›ýû÷/..6qJ¸#²ùzTïš%°0z.öôô,//Wo–••¹»»›8%ŒOO3wîÜóçÏ«Tª¢¢¢yóæÅÄĘ'3ŒHO¼zõj—~ýúuéÒ¥o߾ݺu[»v­y2ÀˆôÀ®®®ÿøÇ?JKK:TZZºwïÞnݺ™4¡æææ%K–xyyÉåòØØØÊÊJÃcV¯^=`À'''OOÏØØXÍçWlœžX8zôèÀÀ@Sg#„X¿~}VVVNNNIIIccãŒ3 ‰ŽŽÞ·oß/¿ü’››ëææ6mÚ43$ ° m?:33sÔ¨Q¡¡¡™™™­÷ÆÇÇ›.¡ôôôeË–……… !RRRBCC ÅÀ ‰9r¤àêê°sçNÓå °.mŸÎÌÌüé§Ÿ¤­™.›ªªª‹/FDDH›!!!ÎÎÎgΜ1<&++ËÇÇÇÅÅeݺu+W®Ô=œì÷’““¥öS§N©c ëêêÔ›š»4?×ÕÕ¶¹«£zÐÁŠf¡¹ËÀYø,:}²YÜnÚºYéw¡ƒ%̸ߦÎâÎYÂ,èèÁBzÐÁŠfÑéÿMK†ôœœ¬U¬ #‘©T*cõu犋‹ƒƒƒúé§©ÅÏÏoåÊ•šçœuÇ444(•ÊÒÒÒ-[¶L:uܸqí%“YÖÜ¥S¾¸SNªSÒûM Kû²bcõ†ÈzîÑä,üجNù/%ëû©3VùÖö%ÐÅÕÕUQSS£nQ*•R£1NNNNNN¾¾¾C‡-++óðð0Gê˦§V©TŸ~úinn®æéé´´4eãáááççwêÔ)én^…BQ__~»1RæõõõÀ¡÷)Ð ,ˆ‹‹;wîÜu &M(11155U¡PTWW/]º4**JzVvvvFF†Ž˜–––µk×ÔÔÔäååÍŸ??((hРA&Í`-ôœÞºuknnnÿþýÍ“bùòåJ¥222²¡¡!:::++Kjß¿IIIBB‚Ž˜ÜÜÜ 6TUUyyy3&33ÓÞÞ²®ñt=õ¡›››———yR‘ØÙÙ¥¤¤¤¤¤hµk>}ºÍ˜»îºkÏý ,vÌLÏ%ЫV­Z´h‘R©4O6˜ˆž8""âÈ‘#=zôpÑ`žÌ0"=—@?ñÄ£Fz÷Ýwår¹yÀôÀ …âøñãNNNæÉÑs ôÈ‘# …yRÀtôœŽŽŽž:uêüùó{õê¥nœ9s¦‰³ÀÈôÀÙÙÙݺuÛ¶m›f#0Àêè)€óòòÌ“&¥ç`: `€M Ø `€MÐó,Ieee]]z³_¿~&Ë“ÐS9rdöìÙeeeš*•Ê”)ð{±±úczî1}Àºé¹zÁ‚kÖ¬©®®nÐ`žÌ0"ý—@Ïš5Ë y`RzÎûúú^¹rÅ<©`:zÎGDDDEE%$$xzzªgΜiâ¬02=ðçŸÞµk×Í›7k6R¬Žž8//Ï~ü¸Ö¬÷l0À&Pl0À&è/€[ZZþóŸÿìß¿_ÑÔÔÔÜÜlú¬02=piié=÷Ü9{öl!Ä®]»âââÌ’Ƥ§^°`Áĉ•Je=„ÑÑч6ÊÀÍÍÍK–,ñòò’Ëå±±±•••†Ç¼üòËC† qvvö÷÷öÙgkkk¥öÄÄD™†´´4£¤ èÚ~°Úwß}·{÷n;;;iÓÝݽªªÊ(¯_¿>+++''ÇÛÛ{Μ93fÌÈÉÉ10æòåËiiiƒ.//Ÿ5kÖóÏ?ÿá‡J‡$&&¾ñÆÒç.]º%UB«?¦çÓçü—ôÀr¹\©TöìÙSÚ,++óöö6ÊÀéééË–- B¤¤¤„††*гiÓ&)ÀÇÇgÁ‚šgzíìì’! 3Ñs tLLL||ü… „—/_^°`Á´iÓî|Ôªªª‹/FDDH›!!!ÎÎÎgΜ¹Ý!ıcdž ¦ÞÌÎÎö÷÷>|øŠ+êëëu§!û½ääd©ýÔ©Sê˜Âºº:õ¦æ.ÍÏuuu………mîê¨t°¢Yhî2pF> ­ d ³0. ù.îd ­YŸ(ÿÊÈfaÜYÂ,Œþ7ËfAô@ÒƒV4‹NÿoZz0¤‡ääd­bM‰L¥RéØ][[ûôÓOïÝ»W266vË–-r¹üG-..þé§ŸBBB¤??¿•+WÆÇÇßVÌ;3œœ|òäÉ   !Ä×_íèèèããSPP°hÑ¢¡C‡feeµ—ƒL¦gîVJ6_O€ê]³äaT¶9)aió2àh™—@3)“cRícR:%þ¥[`¬òMÏ%Ю®®{öì¹páBqqqïÞ½ï|H©[!DMMºE©TJ†Ç¼ùæ›ýë_srr¤êW1nÜ8éCß¾}œœ&L˜pãÆ;/×€þ÷ !üýýï¿ÿ~cU¿B???õin…BQ__nx̪U«RSS>Û(¯JLLLMM?~¼——×Ò¥K£¢¢¤'`egg_»v-!!AGÌâÅ‹³³³<póæM™Læàà „ÈÈÈˆŽŽvww/((HJJš8q¢ÖYe€ÍÒS/X°`çÎÑÑÑF¿xùòåJ¥222²¡¡!::Z}³îþýûKJJ¤¸Í˜ëׯ§¦¦ ! $âààpóæM!Ä'Ÿ|òÊ+¯ÔÖÖúúúÆÄĨj€žxëÖ­¹¹¹ýû÷7úÀvvv))))))Zí™™™ºc\\\Ú»ûùÀFÏÐ9è¹ØÍÍÍËËË<©`:z àU«V-Z´H©Tš'LDOqäÈ‘=z¸h0Of‘ž{€Ÿxâ‰Q£F½ûM`ÕôÀ …âøñãNNNæÉÑs ôÈ‘# …yRÀtôœŽŽŽž:uêüùó{õê¥nœ9s¦‰³ÀÈôÀÙÙÙݺuÛ¶m›f#0Àêè)€óòòÌ“&¥§ð߈ÕÓséóðÚ.€333Gš™™Ùzo||¼‰³ÀÈÚ-€ÝÜÜ(€FÛðñãǵ>¦ÂÕÂÌBÏ{€§OŸ®Õm²d0=ðéÓ§µZrssM– ¦ÒîS 7oÞ,„¨««“>H …é³ÀÈÚ-€7lØ „¨©©‘>!îºë.__ß÷ßßL©`<íÀÒ¥Î/¾øbZZšóÀ$ôÜLõ èôÀtÀ›@ ° í>KSeee]]z³_¿~&Ë“ÐS9rdöìÙeeeš*•Ê”)¡}±±zöì1K`}ô\½`Á‚5kÖTWW7h0Of‘þK gÍše†<0)=g€}}}¯\¹bžT0=g€#""¢¢¢<==Õ3gÎ4qV™žøóÏ?ïÚµëæÍ›5)€VGÏ%Ðym1iBÍÍÍK–,ñòò’Ëå±±±•••†Ç¼üòËC† qvvö÷÷öÙgkkkMš*ÀŠè)€%'Ož¬¨¨0u6Bˆõë×geeåää”””466Θ1Ãð˜Ë—/§¥¥íÙ³çСCÏ?ÿ¼X=ðÕ«Wzè¡^½z1¢W¯^?üpuuµIJOO_²dIXX˜——WJJÊáÇ …1›6m7nœODDÄ‚ ¾ýö[“¦ °"z à… ¶´´äçç744üøãÍÍÍ .4]6UUU/^Œˆˆ6CBBœÏœ9s»1BˆcÇŽ 6Ìt©¬‹žøÀ[·n¬»úØ=ðo¼qõêÕÀÀÀ.]ºVUU¥¥¥™4¡ÄÄÄÔÔT…BQ]]½téÒ¨¨¨ !²³³322tÇ,^¼øý÷ß?xð`@@ÀÍ›7ýõW“¦ °"öºwûøø9r¤¨¨¨¼¼< 88ØÔ -_¾\©TFFF644DGGgeeIíû÷ï/))IHHh/æúõë©©©BˆAƒI‡888ܼyÓÔ ¬‚žXl†ÒWbgg—’’ÒúùU™™™ºc\\\¬ïž^€¹´]gffŽ5*44T³ìT‹7qVX®Nù¼lA»°››0ÀæðpfaÀÙ€j»>~ü¸Ö¬šž§@OŸ>]«%::ÚdÉ`*z àÓ§Okµäææš,L¥Ý§@oÞ¼YQWW'}( Óg€‘µ[oذAQSS#}BÜu×]¾¾¾ï¿ÿ¾™RÀxÚ-€¥K_|ñÅ´´43æ€I´[K’““•J¥V£›››Éò€NJïËuöðfÓÒS÷èÑ£u£J¥2M2˜Šž¸¼¼\ýùêÕ«)))÷ÝwŸ‰SÀøôÀþþþšŸ7oÞ•””dâ¬02=ïÖboo_SSc¢T0=g€¿úê+õç›7oîÛ·ÏÛÛÛÄ)`|z à9sæ¨?wëÖíÞ{ïݼy³IÀôÀ.\0O˜”ž{€Oœ8QTT¤Þ,**:yò¤‰SÀøôÀqqqMMMêͦ¦Y| IDAT¦øøx§€ñé)€KJJú÷ï¯Þìß¿qq±‰SÀøôÜìééY^^(m–••¹»»›>+@[bcõìÙc¦Œ8–¹è93wîÜóçÏ«Tª¢¢¢yóæÅÄĘ'3ŒHÏàÕ«WÏš5«_¿~vvvÍÍÍ111k×®5Ofl—Ù~¯ [¢§vuuýÇ?þQVVVRR¤¾ë¢çh!DKK‹R©lhh ljjjnn6CZ—ž¸´´ôž{Œœ={¶b×®]qqqfI cÒS/X°`âĉJ¥²GBˆèèèÇ›#/ŒJÏ=Àß}÷ÝîÝ»íìì¤Mww÷ªª*Óg°<® X=°\.W*•={ö”6ËÊʼ½½MŸØ"Ù|=ªwÍ’@'¥§Ž‰‰‰çw„—/_^°`Á´iÓ̒ܽW* .V²9zî^»v­]@@@aa¡O×®]_}õUód€é)€]]]÷ìÙS^^~äÈ‘’’’]»vÉår£ ÜÜܼdÉ///¹\YixÌÖ­[###]\\ìíw;11Q¦!--Í(©€.±±úÿƒÐSKoýõ÷÷¿ÿþû…ׯ_7ÊÀëׯÏÊÊÊÉÉ)))illœ1c†á1III¯¿þzëC~óüóÏ%U@' §?~ü¥K—Ô›ùùùF8==}É’%aaa^^^)))‡V(ÆLœ8ññÇïÝ»wëníìì£~x5z àÞ½{:ôСCBˆ÷ßÔ¨QO=õÔZUUuñâEu-âìì|æÌ™Ûi-;;Ûßßøðá+V¬¨¯¯×,û½ääd©ýÔ©Sê˜Âºº:õ¦æ.ÍÏuuu………mî2z:hö ƒ%ÌBs׳„YwFZ,áϤqÝùJen3k=̶’–ö7Kw²ùB÷ÂŒ³Œò“Ö@–öçÁÒ~NÒ=Ѓ)zÐÁŠfaøÏIãþ¤5Û¿iï|% dÎÓýÏCrr²V±ö_dÕ&™J¥Ò±y󿤤¤aÆ;wnÛ¶mQQQw>jqqqppðO?ý"µøùù­\¹2>>Þð˜þóŸS¦LijjRòõ×_;::úøø,Z´hèСYYYíå “éŸ»Å1à•›Ö÷n‡õÔót¾N9)aȼÌùlC³Mʱ:餌ó÷—IYÀX7)šõýóÏz'%¬ð_JæË'¥±Ê7=¯ABŒ1ÂÇÇçøñã“'O¾÷Þ{ï|H!„«««¢¦¦FÝ¢T*¥ÆÛŠÑ2nÜ8éCß¾}œœ&L˜pãÆ c=µ øìÝ{\”uÞ?þÏ2rP@”ƒA˜"ÒŠÚÁÄ]65+ïÒt5­­- ]­Ö\Ŭ=Õnº¦ÑA%Ãr;<¢Û‹{k›Ç½­T½m=ƒ˜ Š&r•ƒ1xýþ˜ïÎEE“ëóžy¿çõük¸ yÍËàóöãuq ¸s†"x˜k\ýÉ'ŸÜyç>úè‰'Ο?ûí·>|¸ý©!!!‘‘‘ÎÓÜEEEõõõýû÷ÿ¡_ÓÃ07ñ¸ÆàôôôÜÜÜ#F(¥6nÜ8wîÜ»îºëܹsíž:uêo¼1bĈ°°°^x!%%¥OŸ>J©5kÖÔÔÔL™2¥¯innnjjjjjRJ566*¥üüü”Rï½÷Þ=÷ÜÓµk×ÂÂÂéÓ§ßwß}mŸ1øÁÜêbW¥Ôuüº„9AìJ\æg€÷íÛçØý*¥:tèð‡?üáoû›)Á/½ôÒ¸qã’““£££-‹ó—u7mÚ´zõê¶¿æÃ?ìÔ©Ó¸qãš››;uêÔ©S§ÊÊJ¥TNNÎwÜÑ­[·I“&ýä'?ùôÓOMy© ÀUÏoݺuذa=zôPJUWWwèС¹¹ùäÉ“¦{yyeddddd´:¾|ùòk~MZZZËÛe9mÞ¼Ù”×ò\õ ð=÷Üóý÷ß;÷îÝû»ï¾SJ555¥¦¦½4ó\ãh®ý6HW„÷¡^p×ó5÷ÝwŸRê³Ï>sÉ+pS”¿.‹&€s¯ pUUÕéÓ§o¿ývLJ}ûöõ÷÷ß¿Ë ðõ|»±üòÚ_c,Óÿ:<˜{]}îÜ9¥TPPóHpp°ãàúšëdùO¯¼òŠãø×_íüš£Gž?ÞùaËOµ|Ü~çÏŸ?zô蟼ÕkhCËghÃÕZ´z íw’æµñ'y®ÿÿ…¾ï‡Vnìû¡A¦?´ä&?Y&)üd]ëÚà&ßæþd1ý~¸Îÿ¼VÚhqcß7ÐâÆ¾n¬Å üI¶ÿûá:Ûµ¿ÅõÿI¶ÿûá@²Ÿ¬6˜ò“Õþהּ×Oë÷ƒº¾ÿ›¦»þŸ‹W^y¥Õfͬ×`1 ìçj¿ªªªÐÐÐþóŸƒv±Z­~øáÃ?|ý_óÙgŸ=ôÐCv»½í,‹Å¤î×q]™¥Ûµ/÷º®3À×q]Ù5Ï6›DYê:²Ì ¢»Ь?@nÿ§ÌÊYJæÏ¯ÈR„‹e)~—@£”î,”jOmþ¦äú 7ËbYêšA&mßÜë pHHHdd¤s÷_TTT__ß¿ÿú5­¸×ï+¥¦NúÆoŒ1",,ì…^HIIqürïš5kjjj¦L™ÒÆ×477755555)¥•R~~~.mÌà·µs¯3ÀJ©—^ziܸqÉÉÉÑÑÑ‹eÕªUŽã›6mZ½zuÛ_óá‡vêÔiܸqÍÍÍ:uêÔ©See¥kj€›q»3À^^^­Ž/_¾üš_“–––––¦ý%Cnw@l€À#` `ðØ€GÀ<6Àà°àíê×ËòËk±Lÿëà g€À#` —@€+Q^×}Í,\@ ΀GÀ`¸œ,…Vp6g€À#à 0€Ép²À=á 0xl€À#` `ð¸ ÈDv"Üî€ œ€3Àpís˜ï(_3þÖ€“¥àB8 ×ö½ÝÕ¯ Ý°€ 0xl€À#` `ðØ€GpÙ¸¹¹yöìÙaaaV«uìØ±gÏž½þ¯¹Úñ©S§ZZxûí·éú€{sÙxþüù«V­Ú¶m[IIÉ÷ß?iÒ¤ëÿš6þÛ©S§6üÛ¯~õ+¢2àö¼]œ••5gΜÄÄD¥TFFFBBBQQQŸ>}®çkÚøo½¼¼üüü\QÜškÎWUU>}úöÛow|Ø·o_ÿýû÷_Ï×´ýß®Y³&**jàÀ/¿ür}}=U!pw®ÙŸ;wN)ä<ì8xͯiã¿0aBnnî—_~ù§?ýiåÊ•©©©m¿ Ëzå•WÇ¿þúkç×=zôüùóÎ[~ªåãö;þüÑ£G¯øä­^CZ>C®Ö¢Õkh¿ëü“47¨?ISÐ|?´rcßí 2ýû¡%7ùÉ21Há'ëZÏÐ7ù~0÷'KÞ÷þ}ûœ±Òš„•ö‡’÷“Eöw7ù~ÀJ{ý¸d‰\ÕE–¢Ä(KÜK‹ààಲ2??¿#F¤¦¦8pÔ¨QÅÅŬ³D–zî¹çŠŠŠ²²²n¾ùæãÇOž<ùÖ[o]´h‘éA”Y"KõíÛwôèÑñññ‹ÅqdΜ9 ,PJ=ùä“L³D–RX”˜Qf¡T;a|pɹª‹,…E‰Q–®ÞËÔ¥KÃ0­Vkmm­aAAAܳD–ŠŽŽ>yò¤óÃ’’’˜˜A”Y"K·:§#ˆ2Kd)‹“ Ê,”j'Œ.Y"Wu‘¥°(1ÊÒAÔû»˜˜˜µk×VTT$&&ÕÕÕùúúrÏYª¦¦&**ÊùaÏž=+++uQf‰,uÛm·µ:2`ÀA”Y"K),JL‚(³Pª0>¸d‰\ÕE–¢Ä(K WïÀeZ¿~½¯¯¯Ïºuë ÃØµk×Ì™3¹g‰,•˜˜XTTäü°¨¨¨_¿~:‚(³D–2 ãøñãšžÜUY"KaQbD™…Rí„ñÁ(Käª.¯%FY:`¬K]]¾Û9¸*K^©]»v}ûí·Î¿ýöÛüü|îY"K-\¸°C‡;vüüóÏ ÃXµjÕ¢E‹tQf‰,å€E‰EeJµÆ—,‘«ºÈR%VY¦ÃÀ8~üø—_~yüøqIYÂJÝrË-§Núì³Ï~úÓŸ†qìØ±øøxîY"K€&îŸ%rUY <î­ÅàÁƒ/?˜ŸŸŸ“““‘‘‘ŸŸÏ1Kd©òòòI“&mÛ¶ÍÛÛÛn·>|ÅŠaaaf=¿K²D–JNNÞ¹sç¥K—bccOœ8aF``àùóçM¢ÌYJaQbD™…Rí„ñÁ%Käª.²%FY:à&XZ¤¥¥]ñxBBÂÕ>åþY"K͘1£K—.'NœèÙ³çÉ“'§OŸ>cÆŒO?ýÔÄú,‘¥«ªªBBB›žBœ%²”¢Ä$ˆ2 ¥Ú ãƒK–ÈU]d),JŒ²´pÙ¹g7Э[7ÇÝÛjjjÂÃùg‰,õë_ÿº_¿~o¿ývHHÈ›o¾Ù«W¯Y³fé¢ÌY L‡ñÁ%Käª.²x8\ ­{÷î]»vu|XUU•˜˜xæÌÖY"K9/¶±X,¡¡¡Ã† {öÙg}||L¢ÌY L‡ñÁ%Käª.²x8l€µHJJºüà¾}ûXg‰,õ‹_ü¢²²ò7Þˆ=~üøÌ™3»wïþÑG™D™%²0‚E‰EeJµÆ£,`‹£,ð;ÀZ¤§§;WTT¼ÿþûcÇŽåž%²Ô›o¾ùøã'$$8>5jÔ›o¾©#ˆ2Kd)`‹‹ Ê,”j'ŒFYÀ%FY:à 0…ŠŠŠG}tË–-’²$•:sæLiiittt=4EÐg +%òß5E–º%A”Y(uc0>Ü?Käª.²T+X”e™g€)„……iúµfÉ(uàÀ??¿Þ½{÷èÑ£¸¸ø›o¾¹å–[tQf‰,%òß5E–º%A”Y(õCa|pɹª‹,Õ %FYæpñM¸„jh¡¦¦æ£>JLLäž%²ÔàÁƒ7oÞlÆÚµk}||üüü–,Y¢#ˆ2Kd©VÊËËGŒAD™%¦%A”Y(ÕNŒ²Z³ª»$Hk%FY:`¬E«eˆŽŽþâ‹/¸g‰,ÔÐÐ`Æðáó³³ bbbtQf‰,u¹~ýúÑQfÉ(…E‰EeJµÆ£¬Vd¬ê® Ò—…E‰Q–¸Z‹o¾ùÆù8 <<\@–ÈR:tðóó»xñb~~þ_ÿú×   ššîY"K566¶|ü?ÿó?‹EGe–ÈR ‹“ Ê,”j'Œ.Y"Wu‘¥°(1ÊÒ`-zõê%/Kd©˜˜˜µk×VTT$&&ÕÕÕùúúrÏYªS§N-?ŒŽŽþä“OtQf‰,¥þýó[ZZZRR¢ï{2ˆ2‹¾‘ヲÆ—,‘«ºÈRŒ²´põ)hWZ¿~½¯¯¯Ïºuë ÃØµk×Ì™3¹g‰,õM eee:"è³D–2 Ãf³3F)åå奔3fŒÍfcD™EY ¸Àøà’%rUY ãƒQ–؃§«««#û¡%ËY ¸xöÙgGŒqìØ±K—.;vìî»ïž>}:ë Ê,ÊRÀÆ£,pŒ²tÀû€™zöì¹sçÎèèhLJ'NœøéOZ\\Ì7ˆ2‹²€gÂø`”¥6À`¦€€€sçÎ9ï\bF``àùóçùQfQ–ðLŒ²tÀM°´¸ï¾û®ö©72ÍY LóÍ7ßôîÝÛñá7ß|sÓM7±¢Ì¢,…ñÁ"L‡ñÁ(KœÖÂb±LŸ>½Õ­Û ÃX¼x±éàdY"K€évïÞzóÍ7;><~üxEEÅwÞÉ7ˆ2‹²Æ‹ 0Æ£,°ÖÂb±455y{ÿÇ v»ÝÞ±cGÞ&Kd)àge9—––FGGÇÆÆêx~ú Ê,š ŒAÀˆÈU]d)ŒFYæÂX ??¿šššVïfV__ÒÐÐÀ4Kd)‘˺ÈR8ƒÄ(«¼¼|Ò¤IÛ¶móöö¶ÛíÇ_±bEXX˜¹)”A”Y”¥0>X)Œ>Y"Wu‘¥0>eé€ °±±±ÿûßZ<|øðƒ>xüøq¦Y"K‰\Ö¥–Â$.Y>úèÅ‹ßzë­ž={žúhddäéÓ§³³³?ú裼¼<¾Y"Kùúú^þoßÿ½ŸŸŸ¹A”Y"KõèÑ£¸¸¸ÕÉ–âââ=z˜D™%²”RÊb±477;?lnnnuâ…]ee)ŒA ãƒO–ÈU]d)ŒFYZ¸z.Öš5k"""œÎkÖ¬áž%¯TLLÌáÇ[|øÞ{ï}â‰'LO¡ ¢Ì¢,e`|0 Âøà’%rUY ãƒQ–ØkÔÜÜ\PP°cÇŽ‚‚‚ææfYÂJ‰\ÖE–ÊÏÏ÷òòš>>ëÖ­3 c×®]3gÎ4=…2ˆ2‹²TK,‚0>¸d [Õ‰ƒ²0>eé€ 0…}ûö­X±âÈ‘ŒUú IDAT#’²„•’´¬Óg…‡‡ggg777ÏŸ??))I_e–°Ruuu6›MÇ3»*ˆ2‹²TKî„ñÁ(KتND…ñÁ(ËtØü?’–uú Ê,œAb”æÂøà’%rUY <6ÀÿÈe]d)gXe€‰0>e‰\ÕE–Oƒ·Aøÿíß¿¿°°pÀ€}úô“%²€ÇÂø`”n`ð\ý€ .ˆÌ1ÈÆæS8¬ÅòåËSSS½¼¼ZܸqcUUÕÏþs¦Y‹eÖ¬Y->õÔSݺu{íµ×L "ÎsÝwß}WûÔÆ9QfQ–"”3‘l|`Nð…ñÁ(Kl€µ°X, ~~~-®ZµjñâÅ;wîdše±Xzöì9eÊ”ßþö·Îƒ[·nýõ¯}ðàAƒˆ³\åÂ… ;w–—e±X¦OŸn±XZ4 cñâÅæN² Ê,âRdãƒr&ÒŒÌ)ð"GÆ£,°ÖÂb±äæævìØ±åÁ'NÌ™3Çô‹¦È²,KaaáÈ‘#_xá…çž{Îq°¬¬ìæ›o®¯¯71ˆ8K‘³ WUðÊjjjòöönyÐn·wìØÑôaOD™E\Šl|PÎDšñ9ÕNU\²DŽ*ŒFYZÜkÚã(¥¼®‚o–Rª¡¡¡¨¨¨{÷îK—.uܽ{wdd¤¹A”Yï¿ÿ¾ÝnoupÆ ÙÙÙæQf)¥fÍšÕê`jjêœ9sÌ ¢Ìr|K´:¸råÊ!C†˜D™¥”êÙ³çܹs[ܲeKbb¢¹AÄY¾¾¾õõõ­ÖÕÕùùù1 ¢Ì¢,E9>(g"Íø œ‰dãƒr&bTqÉ9ª0>eé€ °W\•¸g9ƒ<öðÃ/Z´¨oß¾Ó¦Mã›…YÅ%K)•››»þ?effZ­Vsƒ(³”R………ÑÑÑK–,qø`È!Lƒ(³(K‰D6>(ç”"”3£ŠK–¼Q…ñÁ(K WïÀeª¨¨™%ÂÅêL²®ªà“•ŸŸïåå5yòämÛ¶=ztÛ¶mO?ý´——×®]»˜QfQ–"˜SíD9>ä]¬N™…QÅ% ãƒQ–Økqþüy‘Yò`VqÉùÊ××·±±Ññ¸¸¸xܸq±±±?þ¸ÍfceÆš5k"""œÿÒ±fÍÖA”YdAdãsªèÇeF•›gIUŒ²L‡»@k!òŽy#GŽ?~ü”)S”RÿøÇ?Ö¬YãüTVV–‰A”YW|sMȲüüül6›¯¯¯Rª¤¤dÖ¬Y{÷î:thfff`` Ó¬ÊÊÊÐÐPŸÐM²»téRQQQuuu×®]ûôéÓ¡CîA”Y4A"ß2—l|PÎD²ñA91ªeI…ñÁ(Ëd®Þˤ$ÞÚ!44ôøñãŽÇ;vìèÖ­Û³Ï>;uêTßEdY¸X \U`:²ñA9ÉÆåLÄÅê\`T°€3ÀZˆ|Ë\__ßòòò   ¥Ô·ß~›’’rêÔ©ÆÆFÇ­ùL ¢Ìù>„"᪠FY÷ÝwßÕ>µqãFŽA”Y”¥D¾e.Ùø œ‰dãsª0ª¸da|0ÊÒÏ©jnw±›7oÞ»ï¾ë8¢ûŽyº³zôèQXXèx\XXØ­[7sŸß%Yééé­>õÔS/¾ø"߬‘#G¾÷Þ{ŽÇÿøÇ?¦¶`nqÖš5kæÍ›×òÈ£>ú÷¿ÿÝô ²¬½{÷ÞsÏ=ŽÇ~~~k×®õöö¶X,Ë–-37ˆ8+þßzöì¹}ûvÇã>}úlÚ´‰iee)E8ªÈ‚ÈÆåL$”3£ŠK–ÈQ…ñÁ(K —žKI¼µÃôéÓ ðÕW_mݺµwïÞ¿ÿýï ÃhhhÐñ]D–¥p±:“,%ñ}}||jkk;温¦Ÿ)Ê,§3g΄‡‡;755éË" ¢Ì""”3‘l|PÎD²ñA91ª¸dÉUŒ²LÄãU²#òŽyÕÕÕ÷ß¿Åb±X,ãÇ¿pá‚av»ý­·Þ27ˆ2 ³ŠK–’ø†7ÝtÓ?ÿùOÇãõë×0ÀÐöŠ2ËÉf³8×ÖÖúûûs¢Ì""”3‘l|PÎD²ñA91ª¸dÉUŒ²L„ 0ü06›ì¾ Y˜U\²pU£¬–ÂÃó³³›››çÏŸŸ””$ ˆ2‹²”0d£Š&ˆl|PÎDŒ*.YâGÆ£,³`¬Qeeess³¼¬†††ÊÊʦ¦&Y˜U\²pU£¬–233•R^^^ÞÞÞ999‚(³h‚ÈÆåœ2GA.Vç’…QÅ(Ë ãƒQ–Y°ÖâÈ‘#}úôQJÅÆÆ;vìã?NHH3fÌ™3gXg½óÎ; ‹E)Õ±cÇaÆåç盞B™…YÅ(K*aWU\nß¾}+V¬8r䈘 Ê,­AdãƒrN„£Š,«3Ê’Jö¨Âø`”e ¼ ’>ø`ppð3Ï<³dÉ’ï¾ûN)5~üø•+WFDD¬^½ši–ãîiiiJ©åË—Oœ8ñâÅ‹}ôÑÆ‡jbq–TçÎëСÍZeUUUuéÒ…æÖ)³ëêê‚‚‚¼½½%e›#”3‘l|`N™£ŠQFÈáê¸L]»v­®®6 £¼¼\)uúôiÃ0NŸ>Â7+""âÀŽÇÿú׿n½õVÃ0Þ|óÍ¡C‡šDœeàbuY¸ª‚W°@6>(g"Ùø žS.Vç…QÅ+ <6ÀZtîÜùâŋƿÃÄq±Í… œ÷Iã˜å 2 £¾¾>00Ð0Œ'Nøùù™D™…YÅ%ëxì±Çòòòyä‘””””””%K–$''O˜0oÖܹs###_~ùå—_~922ræÌ™Ï>û¬Õjݾ}»¹AÄYÀýø œ‰†æñA9q±:—,Œ*FYàɰÖbàÀï¾û®aK–,¹é¦›žþù#GŽ<ÿüó?ùÉOøf 2dÁ‚†a\ºtiÞ¼ywÝu—a'NœèÒ¥‹¹A”Y˜U\²pU£,à‚l|PÎD²ñA9ÉÆåLĨ⒅Qò`¬Enn®···Õj >tèÐàÁƒ•Raaa»ví⛵sçÎÀÀÀÐÐÐÐÐЀ€ÇØØ´iÓ“O>inef—,\UÁ(K$²;²PÞú…l|PÎD²ñA9q±:—,Œ*FYòPŽújš `]Ž=ºfÍÇð¸téRii©¾ß9!Ë:}úôÇüñÇŸ:uJÇóÓgaVqÉÂUŒ²Þÿ}»ÝÞêà† ²³³™†¡”š5kV«ƒ©©©sæÌaä@6>(g"Ù¨" ÂÅê\²0ª¸dQ޲,ÊñA<ªL‡ °^Ÿ|ò‰¼,I¥0«¸d᪠FYêßo%ÚÒÊ•+‡ Â4ȑճgϹsç¶<¸eË–ÄÄD¦A-a|¸y.Vç’…QÅ%‹x|ÍD²ñá’Qe"l€õ¢¼Ï6Y–¤R˜UŒ²pU—,¥Tnnîúÿ”™™iµZ™9² £££—,Yâ¸d [Õ‰ƒ²(ÇYåø ÌÒ—@ká¸ÞùÏÃÿú׿¦OŸ¾cÇÖY"K8ƒÄ(Ëb±ôìÙsÊ”)¿ýío·nÝúë_ÿúàÁƒƒ”Raaa»wïŽUJíܹsܸq&LhnnÎÊÊ2wŒ’)Œ&AÀˆÈU]d)ÊñA–E9>(³tÀ`-8pë­·:?ìß¿ÿ¸g‰,%rYY ge)¥6mÚ4räÈ   çž{Îq$11ñÛo¿åtîܹ®]»:wïÞ½cÇŽ™™™¦ÿS7YÂø`¤0>ød‰\ÕE–R„ãƒ,‹r|Pfé€ °qqqË—/ÿå/éøpÙ²e½zõâž%²”Èe]d©-[¶|þùçοČ7núôé/¿ü²éA”Y"K9ÄÄÄlݺ5%%ÅËËkÚ´iJ©ÒÒRç°äÔ£GÂÂÂÁƒ+¥ »uëfzqÂø`¤0>ød‰\ÕE–R„ãƒ,‹r|Pfi¡ñ-–<ØW_}˜˜øÀôë×/00pÇŽܳD–JNNÞ»w¯óï¿þ:99YGe–ÈRv»Ýù¡ÝnÐD™%²”!ô-s§OŸ>`À€¯¾újëÖ­½{÷þýïoFCCƒéc”,ÈÀø`d`|ðɹª‹,E9>Ȳ(Çe–<^%GåååYYY¿ûÝï²²²***ddÉ+%rYY*))Éñ‹%ï¼ó΀tQf‰,e†¯¯occ£ãqqqñ¸qãbccüq›ÍÆ4È0Œêêêûï¿ßb±X,–ñãÇ_¸pÁ0 »ÝþÖ[o1 rÀø`„ñÁ%Käª.²åø ˢģÊt؃G¹¬‹,…3HŒ²³ÙlçÏŸ—,`|pɹª‹,%åøà;ªph-×Ä·’ŸŸŸ“““‘‘‘ŸŸÏ1Kd©íÛ·ÿ×ý×M7ÝS\\\ZZúùçŸ'''›õü.ÉYJ)UQQ‘››{êÔ©¨¨¨ŸýìgZßVž,Kd)‡ªªª.]ºtèÐAk eCccc]]]PP··Þ›ha|°R¬²D®ê"K)ÚñA™E6§ˆ³L„ °Ë—/¿ü`ZZZAAA^^^ZZÇ,‘¥”Ðe]d)ࢨ¨h̘1EEE±±±[¶lÙ¹sç‚ n¹å–¬¬¬îÝ»s rXºté;ï¼SXXhFÇŽüãÿùϾóÎ;ùa|°rÀø`”îr|Pf‘â,Óa à £¬|088ø™gžY²dÉwß}§”?~üÊ•+#""V¯^Í1H)5oÞ¼wß}×±«Y¾|ùĉ/^¼øÑGmܸqèСƒÀt"Wu‘¥(ÇYåøà>ª8­fmÆŒo¿ý¶°,¥D.ë"K]ítJBB‚égZȲD–RJååå;v¬K—.½zõêÖ­ÛéÓ§#""Æ÷£ýˆiRjéÒ¥7nt¼ïë˜1cžxâ‰ÄÅŽôÒKÛ·oçtEî„ñÁ%Käª.²åø Ë¢®Uí‡3ÀZäåå-X°àÌ™3—.]r9tèPbb¢RjÏž=L³D–Â…yŒ²€‹€€€ªª*ŸÆÆÆN:]¸pÁjµÖÕÕõèÑãܹsƒZf)¥ºwïn³ÙNž<Ù§OÇ?° RL‚Æ«,`Á%ãƒl&*ýãƒ2Kl€µèÛ·ïèÑ£ããã-‹ãÈœ9s,X ”zòÉ'™f‰,|á ’Ûf 4hòäÉS§NÍÌÌÌÈÈx衇¦M›öî»ïîÝ»÷Ë/¿ä¤”JNN3fÌ /¼`Æüùó?ûì³¼¼¼“'O&%%UWWs RL‚€5«º«‚ôeQ޲,ÊñA™¥…Kî=-^ppp«#qqqܳD–ºÜóÏ?OD™% ÔÎ;G}ûí·ú7___ǾY"K†‘››ëíímµZƒƒƒ:䏸0,,l×®]Lƒ ÃØ¹sg```hhhhhh@@ÀöíÛ ÃØ´iÓ“O>É4ÈÀø`tEî™%rUYŠr|eQŽÊ,ð;ÀZÜvÛm­Ž 0€{–ÈRW¼†mÇŽŠêÂ<Y"K=ýôÓ£G~衇Zžlyî¹çLŒ ÏYJ)5vìØ‚‚‚ýû÷2$"""//ïôéÓÝ»w7ý=È‚”RC† ),,ܺu«Rjøðá‘‘‘J©‘#GŽ9’iÂø`¤0>ød‰\ÕE–¢dY”ãƒ2K\  æqÉêÒ¥KMMMË#½zõ:v옉ôY"Kµ”ýØci ¢Ì¢,nãƒK–ÈU]d)'ŒFYfrõ)hWÂ…y\²FŒÑêÈøñãuQf‰,ÕÙˆ¡œe"K›Ãøà’%rUYÊ ãƒQ–‰:¸në àz¸0KÖ–-[ZY³fŽ Ê,‘¥Àt\²D®ê"K‡Ã%Р…ÅB4bÈ‚(³(Kx&ŒFY&Â`Тկr ¢Ì¢,à™0>e™ˆå®à‡ÂÛ €™’’’Úøì¾}ûØQfQ–ðLŒ²tÀÌ4vìØeË–=õÔS111%%%ùË_¦NÇ7ˆ2‹²€gÂø`”¥…«n? "%''ïÝ»×ùá×_œœÌ:ˆ2‹²”auuuŸ|òÉñãÇõEQfQ–a|0ÊÒ`-Þÿ}»ÝÞêà† ²³³ùf‰,¦ hùók·ÛXQfQ–2 £´´ÔÇÇ'"""33óÒ¥K‚(³(K€‰0>e逻@k1yò䦦¦Vm6ÛÒ¥Kùf‰,¦‹‹‹[¾|¹óÃeË–õêÕ‹uee)‡€€€Ý»woÚ´iذaÇD™EY X¨¯¯ÏÎÎ...–”%¯Æ£,-\½—I)•››»þ?effZ­V¾Y"K"/@YÊ0Œ¯¾ú* 11ñèׯ_``àŽ;LO¡ ¢Ì¢,eFiiiHHˆãñÿ÷ÇÄÄh:™ID™EYJäuÝ"³p±‹,ŒFY:àm´°X,^^^Wü”Ýngš%²ÔòåËSSS[emܸ±ªªêç?ÿ¹‰A”Y"KY,–††??¿–W­Zµxñâ;wšD™%²”CEEEnnî©S§¢¢¢~ö³Ÿ…††šAD™ETVVvæÌ™#F>|Øq¤²²rÆŒÍÍÍÛ¶mãD™EYJ)uêÔ©¸¸¸ÐÐÐßüæ7Ï<óŒÅb1=‚8HjÖ©S§’’’öïß?mÚ´óçÏÿå/¹ù曹g‰,…ñÁ(ËtØkqÅ¿krÏòœRÜw RKåæævìØ±åÁ'NÌ™3çÂ… &Qf‰,Œ´±0÷ïdA”Y”¥¶:|²A•••J©+Vüæ7¿IOO×´å&ËY <Þ <݆ Zí *++÷ïßÏ:Kd© &˜þœ.ÏYjðàÁ—ÌÏÏÏÉÉÉÈÈÈÏÏgD™EYjïÞ½ååå'NÔqÆÒ%A”Y”¥œ"##×­[·bÅŠáÇkÝ Ë*++«¨¨0 £¬¬L)u÷Ýw¯_¿~ÆŒ999:.v ÉY ãƒQ–ØkQQQAs¦”2Kd)%t"²Ô… Ⱦ+ȲD–JKK»âñ„„„«}Ê̓(³(K%%%Ùl¶éÓ§'%%™ûÌ® ¢Ì¢,…­—¬=z´z Y–ÈRŒ²tÀ%ÐàÑ<çºnîY(Å( Ì%òºn‘Yûöí»Úu¦ÿC Y–ÈRàápX‹´´´{ï½—æÌY–ÈRÀ…È D–º¢3f¼ýöÛ’‚(³ôa|°RB¯ë™…‹eµ‚ñÁ(«ý°Ö¢¤¤¤¦¦FX–ÈR"w "KQÞ],Kd)¥T^^Þ‚ Μ9séÒ%Ç‘C‡íرC)µgÏŽA”Y”¥0>X)luXeýñÔBœ%¯Æ£,p 40€3HŒ²úöí;zôèøøxçe‡sæÌY°`RêÉ'ŸäD™EY Ì%rUY ãƒQ–´o;ìqªªªŠ‹‹«ªª$eI*õôÓO¯^½ZÓ“»*Kd©áÇ/[¶Œ ˆ2Kd)Ã0‚ƒƒ[‰‹‹cD™EYÊ ãÃ̓0>¸d‰\ÕE–Âø`”¥CWoÀejnnž;wntttHHHlllHHHttô¼yóš››ùf‰,… ó¸dmݺuÊ”)A”Y"K)¥n»í¶VG À:ˆ2‹²Æ‹ …ñÁ'Käª.²Æ£,p ´³gÏÎÉÉIOO8p`pppmmíž={^ýõI“&-\¸i–ÈRÀNuuõ¹sç»ví*&Kd)p,‚€#‘«ºÈRà¡\} Z¦#GŽ´:xðàÁÐÐP¾Y"K9áÂ<7ϲÛí¯¾újTT”síŠŠŠš;w®Ýnç›%²0‚ñÁ"¨%Œ7Ϲª‹,`-üýý/_^ËËËýýýùf‰,%rYY*===&&&333//¯   //oñâÅÑÑѳgÏ67ˆ2Kd)`ãƒEñÁ'Käª.²x8l€µ3fÌèÑ£[þÛpAAÁ¨Q£zè!¾Y"K‰\ÖE–Â$FYÀÆ‹ ãƒO–ÈU]d)ðpØkQVV–’’¢”²Z­V«U)•’’RVVÆ7Kd)‘˺ÈR8ƒÄ( ¸Àø`d`|ðɹª‹,7ÁÒèÈ‘#û÷ïwüÿþýãããd +eµZKKK[Ýb¡¢¢"&&¦®®Ži–ÈRŽ“* .ìÓ§ãHaaazzºÏÚµkM ¢ÌY ØÁøpÿ Œ.Y"Wu‘¥ÀÓ¹lëí>ùäyY’JáÂ<.Y8ƒÄ( ØÁøpó Œ.Y"Wu‘¥ÀÃá °^ ÝŸ0Y–¤RgÏž8qâ—_~iµZƒ‚‚l6[]]]JJʪU«ÂÃÙf‰,å€3HŒ²€Œ7Âø`”¥„®ê"KÇÂX/ {Y"—u‘¥”RÙÙÙ=ö˜¾çwI–ÈRÀÆ‹ ŒFY"Wua¥êëësss“““cccµQf‰,¥õ)gCù'L–%².Ìã’%òÛOX©÷ßÿò7Ù°aCvv6Ó Ê,ÊRNøVgd`|ðÉ·ºûg•––úøøDDDdff^ºtIF–ÈR:t ßr{”ššyY"K=þøã4A”Y"Kû›|xzzú3Ïk,‘¥”R¡¡¡;wîìÓ§O˃‡ºûî»+**8QfQ–.0>¸d‰\ÕE–Âø`”¥.6SÛo½oß>ŽY"K9áÂ<.Y(Å(«¡¡!,,¬ÕÁðððúúz¦A”YA,‚Z¢Ä% ¥¸da|0ÊÒg€Í”íxpüøñ¥K—¦¦¦öêÕëÌ™3|ðÁرcßxã ŽY"K€>=ôa .tþÛpaaazzºÏÚµk9Qfa|°}0>eé€ °C‡]´hÑ AƒVTT<òÈ#[·ne%²¸9œAb”åtöìÙ‰'~ùå—V«5((Èf³ÕÕÕ¥¤¤¬Zµ*<<œcee)ŒAàþD®ê"K9a|0ÊÒ—@kqàÀ–?ÌaaaçÎãž%©”Èe]d©ôôtǃ+žl1+…8Kd©–ÂÃÿøâ‹#GŽìß¿ßñîýû÷çD™EY ãÃ̓0>¸d‰\ÕE–rÂø`”¥6ÀZÄÅŽûî»Ï=÷œãÃ-[¶è;ÓN–%©”Èe]d©Ç{Ìñ`èСÿûß'[~ùË_>òÈ#&Qf‰,u¹øøø={öLžød‰\ÕE–rÂø`”e"–/Úý eé€ °ùX]]ÝÆ”””°ËY tHKKD™E„ñÁ"tÀø`”¥.6_\\Ü;ï¼sµÏ>ñÄgÏže—%²€ÇÂø`¦Ã`óýèG?ºï¾û®öÙAƒqÌY ¸À$FYÀÆ‹ `Däª.²΃繬‹,…3H,²’’’Úøì¾}ûÚAD™EY xÁøà’%oU§ ¢ÉÂø`”¥ÎëU]]}îܹÀÀÀ®]»ŠÉSÊf³eee]í³O<ñÇ,‘¥p‰EVzzºãÁñãÇ—.]šššÚ«W¯3gÎ|ðÁcÇŽ5%‚8ˆ2‹²Ôå0>Ü9ãƒK–¼U2ˆ& ãƒQ–^®}b©ìvû«¯¾åüsŽŠŠš;w®Ýnç›%¯ÔC=ÔÆgï¿ÿ~ŽY"K/ÉÉÉ{öìq~X^^>|øpÖA”Y”¥0>Xa|0Ê.0>eé€ °ééé111™™™yyyyyy‹/ŽŽŽž={6ß,‘¥€ªªªââ⪪*IYÂJ´ÚÜ~ûí¬ƒ(³(Ka|°Ž„­êÄAYŒ²tÀX‹#GŽ´:xðàÁÐÐP¾Y"K9IZÖ郲p‰QVRRÒ’%KœnÞ¼ù¶Ûn3=…2ˆ2‹²Æ‹ –0>Üød‰\ÕE–R¬²Lg1 ÃÕ¯A Ù³gçä䤧§8088¸¶¶vÏž=¯¿þú¤I“.\È4Kd)`ge›Ãø`‰\ÕE–åêk°eùöî"K9UUU_þ>¬³$•²Ûí¯¾újTT”síŠŠŠš;w®Ýnç›%²0‚ñÁ"¨%Œ7Ϲª‹,`-D¾½»ÈR"—u‘¥ÒÓÓcbb233óòò òòò/^={ölsƒ(³D–F0>X|²D®ê"K‡ÃX ‘oï.²”Èe]d)œAb”\`|°20>ød‰\ÕE–‡ßÖâìÙ³'NüòË/­VkPPÍf«««KIIYµjUxx8Ó,‘¥BCCwîÜÙ§OŸ–:t÷ÝwWTT˜D™%²”Õj---mõ»@111uuu&Qf‰,5pàÀêêê6¾ ¤¤„Wee)…ñÁ$Ha|ðɹªË+…ñÁ(Kl€5"{ËuÊ,a¥ä-ë”A”YŽ“* .tþ ¦°°0==ÝÇÇgíÚµ&Qf‰,÷Î;ï\í³O<ñÄÙ³gyQfQ–rÂøpÿ Œ.Y"Wuy¥0>eéâªSÏBҭ胲pa—¬²²²””¥”Õjˆˆ°Z­J©”””²²2sƒ(³D–jûýý÷ßÏ.ˆ2‹²TKn„ñÁ%Käª.¯Æ£,M°ÖBä­D–’·¬Sg†QXX¸råÊ÷Þ{oåÊ•………:"è³D–0>X¬² ¡«ºÈRà±p ´xÏCFY æ±Êù>„"KQf¡ÔÁø`ä„ñÁ( ‹—,”b”e&WïÀeyÇ<‘¥œpaž›gá ²\D™EY ãƒEPKnž…E‰KJ1ÊÒ`-ðž‡\²D."K‰|à ‘¥(³Pª0>X|²°(qÉB)FY:`¬…È[;ˆ,%r±Y gå Ê,ÊR,‚ Œ>YX”¸d¡£,°ÖBä­D–¹Xˆ,…3HÈraee)ŒAÆŸ,,J\²PŠQ–¸ –F"oí ¬”È÷!YJÞûRIÍB)S`|¸Æ—,,J\²PŠQ–ä[n7‚ ó¸dá ²\D™Eü~-ÀÆ—,,J\²PŠQ–8l¾~ýúýä'?¹é¦›^|ñE1Y"K)¥Îž=;qâÄ/¿üÒjµÙl¶ººº”””U«V…‡‡3ÍYÊgå Ê,‚ ŒAŒ²%>Y(Å(Ë\Ø›ïĉ¯¼òÊöíÛ;&&Kd)'‘‹…ÈR`ŒA-a|0Êw† 0¸;œAb”¥”JKK»÷Þ{'L˜ &ˆ2‹²˜Häª.²”Âø`•¥CW¿—éׯߴiÓ^{í5IY"K}þùçË—/×D™%²”CIIIMM¤ Ê,ÊRÀÆ—,‘«ºÈR ãƒU–8 ž æ1ÊðLŒ²€l€@‹êêjǯ۵z÷¾A”Y”¥<Æ£,áh0Sssóܹs£££CBBbccCBB¢££çÍ›×ÜÜÌ4ˆ2‹²€gÂø`”¥ƒ·«_€puuuAAAÞÞrþ¨E–³¼øâ‹999/¾øâÀƒƒƒkkk÷ìÙóúë¯Ûl¶… r ¢Ì¢,å$rUY LñÁ(K —¾ ±dï¼óNBB‚ÅbQJuìØqذaùùù:‚Þÿ}»ÝÞêà† ²³³MÏY ÌräÈ‘V<Ê4ˆ2‹²”Aµª/é"K€‰0>eé€K µ˜7oÞŸÿüç &üሌŒüÕ¯~Õ·oßáÃ‡ïØ±Ãô¬É“'755µ:h³Ù–.]jnÈRÀNcccUU•Ýnwõ 1“°R aaa­†‡‡×××3 ¢Ì¢,E¶ªS.é"K;ÂVuI¥0>eiáê¸Lp<þ׿þuë­·†ñæ›o:Ôô,¥Tnnîúÿ”™™iµZÍ YÊ©¡¡¡²²²©©IÓó»„°R"/@Yj̘1£GnùoãFz衇˜QfQ–"[Õ)—t‘¥œ„­êÂJ‰\Õå•Âø`”¥6ÀZtîÜùâÅ‹ŽÇõõõ†aœ8qÂÏÏÏô,¥”×U˜$²”!qY7$–š;wnddäË/¿üòË/GFFΜ9óÙgŸµZ­Û·o77È0 ¥TCCC«ƒ+W®2dˆ¹A"K†QVV–’’¢”²Z­V«U)•’’RVVÆ4ˆ2‹²ÙªN¹¤‹,e½®£ª=0ªÚãƒQ–x$-’““ÇŒó /†1þüÏ>û,//ïäÉ“IIIÕÕÕæfY,–††???sŸör"KÍ›7ïÝwßMKKSJ-_¾|âĉ/^üè£6nÜ8tèPs³®XjÕªU‹/Þ¹s§‰A"KEFFnܸñÖ[oUJíÝ»÷‰'ž8pàÀ[o½•››»}ûvƒ”R‹%77·cÇŽ-ž8qbΜ9.\01Hd)§#GŽìß¿ßñîýû÷בBD™ED¶ª“-éJh)²UlIWUí†QÕ~Œ²Ì… °yyy÷ß¿RêâÅ‹ŸþùСC7oÞ¼bÅŠ?üÐÜ,²,²”Èe]d©€€€ªª*Ç·_CCC÷îÝm6ÛÉ“'ûôéÓÐÐ`bRÊb±xyy]ñSæþî“ÈR-á=Ý<ˆlU§Ü+Š,E¶ªSn?0ªÚ £ÊŒ²ÌäÊÓÏ¢>}úã?þøãO:¥5¨¢¢Bëó·$¯”ÈëºE–2dÈ‚ øtéÒ¼yóîºë.Ã0Nœ8Ñ¥KsƒŒ«\‚¥ƒÈR†aØíöW_}5**Ê9h¢¢¢æÎ{ù%ˆ\‚(³(KT«:åœ2$–y]7FU;aTµÆ£,°&rYYjçΡ¡¡¡¡¡Ž_=Ú´iÓ“O>izJµSzzzLLLfff^^^AAA^^ÞâÅ‹£££gÏžÍ4ˆ2‹²pA¶ªS.Uí„RíñÁ(Kl€µxúé§W¯^-,Kd)‘˺ÈR†Ä  ¡¥ðž‡,‚ Œ&AáªN¹¤cTµFU{`|0ÊÒÁûÆ.œ†¶•””ÔÔÔËYjÈ!………[·nUJ ><22R)5räÈ‘#GšžUQQAóÛb"K)¥"""~ñ‹_…††¤8ˆ,…÷˜)ÂUrIǨj'ŒªöÀø`”¥…«wà׆3HŒ²ðž‡,‚Àt"Wu‘¥0>eéÐÁÕp™4½­HÛª««KJJLG"'‘¥ÒÒÒÖ¬Y£éÉ]•%²Î 1ÊZ¶l™Íf‹ïܹsdddçÎêêê²²²˜QfQ–¢_Õu/éJh)Œ.Y"Wu‘¥0>eé€K µ˜5kVFFF˃O=õT·nÝ^{í5s³š››çÏŸ¿lÙ²S§N9ŽDEEM:õÅ_¼Ú­äoŒÈR"—u‘¥—ÉÑ ËYJ)þÅ_¼= Yee)²UlIWBKa|pɹª‹,…ñÁ(K WŸ‚–I)Õ³gϹsç¶<¸eË–ÄÄDÓ³ÈîÃ&²pqþüyúЪªªââ⪪*MÏ/²0B¶ªS.é"K"Wu‘¥ÀÃa¬…Rª°°0::zÉ’%΃gΜéÔ©“éYd÷aYJä².²”RjÖ¬Y­¦¦¦Î™3Çô,²w·Y*!!aêÔ©óçÏ7÷i]D™EYÊ \Õ)ï*²ýªN°ýÀ¨j'Œª†ñÁ(Kl€µPJ544uïÞ}éÒ¥Žƒ»wŒ4=Ëßßÿò¥¼¼¼ÜßßßÜ ©¥„-ë†ÜRò.@Yª¤¤äÉ'ŸŒ‹‹3÷i]D™EYÊ \ÕÉ–tCn)šUlI70ªÚ £ê†a|0ÊÒ`-Ô¿ßqîàÁƒaaa?üð¢E‹úöí;mÚ4Ó³ÈîÃ&µ”°eÝ[JÞ"K#d«:åýB¥–’w]7FU;aTÜ0l€µðõõmllt<...7n\llìã?n³ÙLÏ*++KIIQJY­Öˆˆ«Õª”JII)++37Hd)‘˺ÔR"/@W ![ÕÉ–tCh)‘×ucTµFÀ ÃXˆÂ•+W¾÷Þ{+W®,,,tõË1A)‘˺àR†¸ 䕸̩&õºnŒªöÀ¨¸aØkTYYÙÜÜLGsÇ}Ú0ŒÓ§O‡„„˜žEvk‘¥„]×í ¬”È D–rxçw,‹RªcǎÆ ËÏÏ7=åý÷ß¿üõoذ!;;Ûô,Cb)²UrIYÊAØuÝU7 £ªh–tƒvUYJl€µèܹóÅ‹ ÃhhhPJ]¸pÁ0Œ .˜žEvk‘¥D.ë"K=ðÀ=öX^^Þ#<’’’’’òÿµw÷¡UŸ÷ÿÇ/ktmîŒIÔ,'1i'Í™÷“ l*–.kÇVV+.8ÐÒÄ:ÖI¡ÞÌX`MÚU¡uZ‡usW²?¾eh[a˜´H«ÎšdehîL0&6&1M¼¾vÈ×Õüøó9×§Ÿ×çùøcd'’ëz1÷º|ç\çœÇvïÞýè£VVVz»uø’¡¬µuuu‘H¤¶¶¶¶¶6‰¼üòË/¾øbFFFcc£· ™ÿ¼†p¢#GŽ”——{» å¬Õ]¾µ’d(É{ÝUIâ¨J†³J·[]2TŠ0§Ä²eËöíÛg­Ý½{wIIÉK/½ÔÒÒòÒK/­\¹Ò󵜽µƒd(ÉZ— %yA2”µ¶°°ðÂ… ±¯?úè£E‹Ykßxã+Vx»1¦¡¡áþ¯={ödddx» å¬Õ]¾µ’d(g­îò7eUIâ¨J†³J·[]2TŠ0§DCCCZZZFFFNNÎÅ‹—/_nŒ™5kÖ™3g<_ËÙ[;H†’¬uÉP’$CÙ ¹¬µ·nÝÊÎζֶµµÝÿýÞ.dŒ™zÞ.dEC9ku—oA$Jò^7GU’8ª’á¬Ò­ÃV— •" À©ò¯ýë/ùK¬ÐïܹÓÑÑñÅ_¤n97J»Ö•BI^@ e­-//ýõ×­µwîÜ©¯¯ä‘G¬µmmm3gÎôv¡/½‚•"’¡¬ÛVwöήz¡$ïusT%‰£*Î*Ý:luÉP)Â,¢½½½±±±³³ÓïxÉA(ÉZ— %yA2”µöÔ©SÙÙÙùùùùùùYYY±W?~¼ªªÊÛ…\À’¡ãœJ˜ä½nŽª$qT%ÃY¥[‡­.*E€½W]]=Éw×­[çírƒƒƒkÖ¬™2eJì=ß*++c¿Fõ–d(+Zë’¡¬â+ÊZÛÕÕuèСC‡¥t0èííMÝÿob¡\¶º³J— eEïusTy‚£*an*ݺ=ª$C¥Ÿì½yóæ>|ø^ß]½zuww·‡Ëmݺµ¹¹ùí·ß^¹rå{ï½÷ /”••íܹÓÃ%Œh¨˜O?ýôüùóååå………ÖÚ®®®‚‚‚Ô}rš›Ï!” åXGGG[[Ûƒ>‰DüÞ‹g$C!1.[ÝY¥K†ŠqÙêÎ*£*y’­. _-¾ŽßšJJJ2&åír=ôP{{»µ¶¬¬ÌZÛÝÝ]RRâíV4”/¸˜É ’¡b~úÓŸ=z4E?Ü—…\®ål!—­î¬Ò%C¹'yNYŽª„H†²Z+Rõk¶0»råŠËå®]»V\\ÿ¯¹¹¹ƒƒƒž¯"jÆ ¼×wׯ_?É3 ªªªjhh0ÆXk+++ÿð‡?Ä.byH2ÔÉ“'OŸ>}¯ïž8qÂÛå~ó›ßŒŽŽöôôÄŸØ©©©ñü‰ÉP1W®\¹qãF*~²_ ¹\ËåBV‰qSéF4”ËVwSé†£Ê UÉàøÐZ©ÀèÀ+))9wîÜÌ™3£Ñè'Ÿ|R[[{îܹwß}×ï}%ÅM(É{Ý’¡JKK¯_¿>Éòp¹o|ãÿûß‹‹‹£ÑhKKKOOÏòåË=ÿ‡µd(àKqN%Cò^7GUò8ª€Äùúü3<ðì³ÏÆ.!Ìž=;++kñâÅ—/_ö{SÉrJò^·d(Ç233c_ÄBŽŽæææúº#¸ 588˜¢Ÿ|/}}}—/_þï9ñd(g8§’!y¯›£*yUÉp_é6õ­.*E€oll,ö^|ÇŽûøãÇÆÆüÞ‘$CqVÅܹsûûû­µeeeccc555O=õ”ß›J–ËPƘM›6Ýõ`uuõ/ùKo{å•WŠŠŠâ¿¹þßöÈIDATÒ-**ª««KEcH†rF²Ò%CIVºÍÅQ• g•n¶ºd¨aᬠž­J’1fîܹuuu|ÿý÷.\èíB›7o.--ݳgOSSÓ¥K—šššvíÚU\\¼eËo²¢¡€»HVºÍÅQ• g•n¶ºd¨aÖÑßßÿ‹_üÂï]xL)gUPH>±ã2”1¦¹¹¹¸¸x÷îÝñ»»»xàoÊËËkii¹ëÁþóŸùùùÞ.dEC¹§TéqJ¡$+ÝŠæâ¨J†³J·[]2TŠ0ëèèèÈËËó{S ÅY…0ÆŒŒŒ´¶¶ìÝ»7öà‡~‰D¼](==ý¿_wtíÚµôôto²¢¡ÜSªô8¥Pª•®š sVéÖa«K†J‘ûRóÖZî6uêÔ´´4cÌ÷¿ÿý¥K—N:Õïy@2TÜ76nÜè÷.<æ,ÔÃ?üþûï×ÖÖ®]»v×®]Ï=÷ÜÓO?ííO<ñDuuukkkü‘æææªªª'Ÿ|ÒÛ…â$Cqª•®š+†£*a*Ý8ouÉPžc¼žÿèííµÖƾîëëó{_I‘ ÇYÃÃÃþóŸýÞ…Ç„úÚ×¾6eÊcÌÂ… ?üðñ±±;w~ûÛßþíoëíBû÷ïˆF£™™™‘H$33sþüùÃÃÃo½õ–· ÑPÎHVºd¨8ÉJ7¢¹8ªà¬ÒÃV— •"|pàÅ~©3>>û½æÂ… Ï;çߦ’%*®³³séÒ¥“°^à*(ôBµ´´œ?þæÍ›ÙÙÙK–,‰F£~ïÈb¡$+]2Tœ^QÄHæ"T ˆµzLpC¥ý¿ÿ¾ÚÆÆÆb_(•…d(EOOOì‹ø;ƘiÓ¦åååùº¯¤¸Õ××7sæÌûîKù=£h4:{öìØœ›››Òµ$C9 Yé’¡ UÉsVéÆa«K†òW G$ïËI†*úï|ç;ýýý±¯+**üÞWR\†jmmF£ùùùóæÍû÷¿ÿý§?ýiÁ‚Ï<óLü_6^¯««+..ÎËË{ðÁóòòŠ‹‹ëëëÇÇǽ]Ȉ†î"YéF4GU2œUºqØê’¡R„+Ð:$ ­Jò¾œd¨8¥¿~qBýèG?ÊÉÉùùϾ{÷î«W¯c~üã9r¤°°ðèÑ£.´eË–wÞygóæÍË–-ËÉÉùì³ÏΞ=»cÇŽŸüä'Û·o÷p!#Ê=þ?õ§Z骹b”þÆ¥:”³J7[]2TªøñÖÓH‰W_}Õï]xL2”ÒgfÄ*(„ÊÍÍíïï·Ö^»vÍÓÕÕe­íêêò|]—ŸC(Ê=ÉJ— %Ù~V4¡à¬Ò­ÃV— •"\Ö‘ý«_ýÊï]xL2 íöíÛƘ¬¬,cÌŒ3bÿyûömo™5kÖ]Ι3çÖ­[Þ.dDC¹'Yé’¡mÎ*Ý8luÉP)¬£¿¿ÿÊ•+ýýý~oÄK’¡ÙÙÙ›6mò{sêá‡>xð 1æÀ%%%555­­­555ßúÖ·¼]ÈåçJ†rO²Ò%C!@8ªà¬ÒÃV— •*~?d½òÊ+EEEñÿM‹ŠŠêêêÆÆÆüÞZâ$CÅIÞ—“ …Ä444¤¥¥eddäää\¼xqùòåÆ˜Y³f9sÆÛ…zzz{ì1cLFFFaaaì7ß=öXOO· YÑPÎHVºd¨8ÕJWÍ…ÿ_Î*Ý:luÉP)›`^à_†þe$C!púûûƒøæþ“sêÓO?=þ|yyyaa¡µ¶«««  `âûÐxÈÙçJ†rC²Ò%C!ˆ8ªã²Ò«V— •¾Žßð@Ð_†þ¥$CÅõõõ]¾|¹¯¯ÏïxI)”ä;’¡bÚÛÛ;;;ýÞˆ—ÄBIVºd¨8¥JŸH)—d«K†Škõ˜à†b¼ôôôÿ®òk×®¥§§û²OH†’¬uÉP›7o.--ݳgOSSÓ¥K—šššvíÚU\\¼eË¿·–8g¡ª««'ùîºuë<\kpppÍš5S¦L™2eŠ1¦²²rhhÈß'Ê%ÉJ— %YéV4GUÂ\VºuÕê’¡R‡+Ð÷Ì3ÏXk·oß^VV{¤¹¹yóæÍÓ§Oÿë_ÿêïÞ&Jò¾œd¨üüüS§NÅÿîÅ\¼xññÇïííõkWIrjÞ¼y‡¾×wW¯^ÝÝÝíÕZ[·nmnn~ûí·W®\ùÞ{ï½ð eee;wîôêçÇI†rI²Ò%CIVºÍÅQ•0—•n\µºd¨òuü†‚þ2ô/%Jò¾œd(É'vœ…*))ɘ”‡k=ôÐCíííÖÚ²²2kmwwwII‰‡??N2”K’•.J²Ò­h.Žª„¹¬tëªÕ%C¥Ï‹ðËÐïM,TFFFGGÇ]oçÐÛÛ[ZZ:<<ì×®’$Jò‰ÉPYYYƒƒƒÆ˜h4ÚÒÒrûöí¯ýë}}}~ï+)’¡bÄ*=F,”d¥Ñ\’­.J²ÕÊ×ñ^¹~ýú_|á÷F¼¤jÕªUO?ýôÄ_B_ºté‡?üa¬îJ2”ä;’¡æÎÛßßo­-++«©©yê©§üÞT²$CÅ)UzœR(ÉJ·¢¹$[]2”d«=°‚ßýîwóçϽ }Ú´ißýîwOŸ>í÷¦’¥J²Ö%CÅ4779rä÷¿ÿý‘#Gš››ýÞŽ7ÄB=ûì³GµÖΞ=;++kñâÅ—/_ö{SÉ’ e+Ý*†R­tÕ\V®ÕcÄBI¶zÐCq:ðêëë÷íÛ÷üóÏc8°víÚÑÑÑ?þñÇŽ[±b…ß»Kd¨±ûr1’¡Œ1ŸþùðððŒ3R÷zî)…·Ö¦¥¥?~|Μ9‹-š:uªß›J–d(ÉJ— £Z骹”Z=N)”d«>”¯ã7Ço$ïæÍ›ñ· ˆF£mmmƘ5kÖœ={Ö×}%E2”1fïÞ½ ,HOOÏÏÏOOO¯¨¨8sæŒß›J–^¨úúúW_}µ²²ò׿þu$Ù¸qã7¿ùÍŠŠŠüã~o-q’¡ânܸ±qãF¿wá1¥P’•.Ê(VzŒ^.ÉV— §ÔêqA å÷Žd•——¿þúëÖÚ;wîÔ××?òÈ#ÖÚ¶¶¶™3gú½µÄI†ª««‹D"µµµµµµ‘Häå—_~ñÅ322ýÞZâ$CI>±#*®££#//Ïï]xL)”d¥K†’¬t+šK²Õ%CÅ)µz\@C1Þ©S§²³³óóóóóó³²²bm~üøñªª*¿·–8ÉP’µ.*33stt4öõ­[·²³³­µmmm÷ß¿¯ûJŠd¨¸€À“S %Yé’¡$+ÝŠæ’luÉPqJ­ÐP¼ –‚«W¯~ðÁÆ˜ŠŠŠH$â÷v¼¡*++«¯¯oúôéÆ˜‘‘‘‚‚‚ööö²²²‘‘¿w— ÉP>úèªU«¶nÝj­}íµ×Þ}÷ݦ¦¦ööö¥K—ö÷÷û½»I†êéé‰}ÑÝÝý½ï}ï“O>1ÆL›6-//Ï×}%E2”Q¬t£J²Òh.ÉV— %Ùêåëø „ˆä}9ÉP’OìH†š:1&öÅ’%KüÞWR$C!($+ÝŠæ’luÉP’­ôP ÀÁV]]=Éw×­[çl'’ eEk]2”µ¶««ëСC‡êììô{/ž‘ Ð+X““ %Yé’¡¬n¥«æ’luÉP12­>Q@Cq:ØæÍ›wøðá{}wõêÕÝÝÝ.÷ã ÉP1z÷åŒh(KggçÒ¥K¯_¿î÷F¼$J²Ò%CŨVºj.ˆL«OÐP ÀÁVZZ:ùß¹¡¡!g›ñŠd(ņ ¿7@"FFFfÍšu׃sæÌ¹uë–/ûñ„d(-ÉVz(`@ =ñÄÕÕÕ­­­ñGš››«ªªž|òIw•$ÉPZ’­ôP À€@Ú¿ÿÀÀ@4ÍÌÌŒD"™™™óçÏ~ë­·üÞZâ$C@hI¶zÐCñ`@€µ´´œ?>ö1 K–,‰F£~ïÈ’¡ ´$[=¸¡€ÁöùçŸϘ1#-Mç£ $C@hI¶z@CqT{÷î]°`Azzz~~~zzzEEÅ™3güÞT²$C@hI¶z CiX ®¾¾~ß¾}Ï?ÿ¼1æÀk×®­¨¨8vìØŠ+üÞ]‚$C@hI¶zÐCqH‘H䨱c‹-2Æ|üñÇÏ=÷Ü… Þ|ó͆††ÆÆF¿w— ÉPZ’­ôP À€@ÊÊÊêëë›>}º1fdd¤  ``` ½½½¬¬lddÄïÝ%H2„–d«=¯ÒâÅ‹wîÜiŒ±Ö¾ùæ› ,ˆ=þÀøº¯¤H†€Ð’lõ ‡â5À€@Ú±cÇ~ðƒ;vcFGGÿö·¿cZZZV­Zå÷Ö' BK²ÕƒŠ+Ѐ ºzõê|`Œ©¨¨ˆD"~oÇ’¡ ´$[=С€¡Àk€Á³aÆI¾»~ýzg;ñd(-ÉVÅk€ÁsòäÉÓ§Oßë»'Nœp¹¯H†€Ð’luP\Oiiéõë×'ùCCCÎ6ãÉPZ’­.Š ¼ À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0B À€P`„0 €¡À `@(0BáØ ³Ÿ3×IEND®B`‚sleef-3.3.1/doc/html/trigsp.png000066400000000000000000001227071333715643700163700ustar00rootroot00000000000000‰PNG  IHDR óúX¬bKGDÿÿÿ ½§“ IDATxœìÝ{\”eþÿñkå0ˆÈA@"jbhEnâ‰Ô²Ô2W] V(ËJQ²¶]Y»ðÚZHé`Š®âæ·òPž:¨-&n”gCTp€C`~Ü¿ïÄaî1ffæõ|ìs_÷u_×çº'ÛÞÞ‡Qh4€µ»©» ÀÀ›@Ø0À&€6 ° `€M l`À›@Ø0À&€6 ° `€M l`ìV­ZÕÝ5 ¢¢¢„·Þz«™í.&ª™aЃ+À³:wîÜôéÓ½¼¼œœœ† ’ µ§¤¤DGGwom&uûí·gggk7µ^ Û†e @gì»»€ Ñh4S§N7n\~~¾‡‡GQQѤ]3gÎìÞÚÌÌDëµâa¯_¿Þ«W/S”°\˜Ï¥K—JJJ–,YâçççèèöÔSOI»¢¢¢²²²¤Ï#GŽ|öÙgcccÃÃÃGŒñÙgŸIí.\¸çž{ÜÜÜBBB6oÞ¬P(®\¹ÒfŠŸ~úéÉ'Ÿ òðð˜6mZiiiû2:ìSYYéííýÞ{ïI},X0~üø––=cÖÕÕ=ù䓃 êÓ§OxxøÉ“'…þþþŸ|ò‰Ô¡¢¢B¡P¨Õê§žzª   55uÈ!³fÍj³ÞK—.Í™3ÇÇÇÇ××wÁ‚555úσ–þaG޹~ýúqãÆ999EDD|÷Ýwo¿ývPPPß¾}/^¬Ñh ?c†|;º œZwØOæÈ‘#SSSǺgÏžÎN”®W_}uРAnnnÞÞÞË–-Ó¿Æ'X10À|ú÷ï?|øð%K–ìØ±ãüùózzîÙ³çý÷ß/((HII™;w®ÔøàƒúøøTVV~ñÅÚ¤ÚÆþð‡~øáäÉ“•••#FŒˆomm5¤¯¯ïÖ­[/^|öìÙììì}ûömÛ¶ÍÎÎNϘóæÍ+((8räH]]ÝîÝ»½¼¼:[NFFFxxøK/½tþüù;w¶Ùûàƒ666ž;w®  àâÅ‹óæÍÓ V±yóæ—_~¹¦¦&""âþûï?zôh~~~^^^NNÎ|`økCU†O­«³“¹k×®mÛ¶={vÖ¬YzN”¤°°055õã?V«ÕEEE=ôþ5þ ¬„3ª®®~þùço¿ývÿ×^{Mj=zô›o¾)}Žˆˆxùå—¥ÏÒU¾‹/~ÿý÷Bˆêêj©ýÈ‘#BˆË—/ë[^^.„(++“ú477»¸¸œ9sF·ý}þò—¿ :´OŸ>Ÿ~ú©þþÒUÄóçÏ·Y ŸŸßÁƒu½zõªF£¹í¶Û¶lÙ¢í¦­ù‡~B”––JíBˆü±³óÐfºÎ†•饗¤ÏÒEi©F3{öì?ýéOž±öÃÊVeÈÔºÃvv2#""6lØ }Ös¢´ŠŠŠ¶nݪV«µ7ú ¬ÏÌÊÝÝ}Íš5kÖ¬¹víÚÖ­[-Z(•J¥Réææ¦Ý¬¯¯×³:Ý3Öž!UÉN­«¤¤¤³“éçç'}Т$ƒ Ú¾}ûk¯½¶hÑ¢#FüéOº÷Þ{Mý ,Ð=Ö¯_úôéö¸=??¿Ÿ~ú©¦¦FÊÀ>ª*¥âÏ>ûLϽ¬zú´¶¶ÎŸ?Ú´iÇß¼yó#<¢§YYYsssQQQpp°n{Ÿ>}®]»&}Ö}Dù¦›:~ì(  ¹¹¹¢¢Âßß_QTT$5vV k CΘux2… …Bú`à‰š>}úôéÓùå—ìììxàÊ•+7ú ¬ÏÌçòåËË–-;uêT}}}mmí¦M›JKK ü!œ!C†Üyç)))W®\Y½zuû>qqq‰‰‰Ú{wìØÑÜÜl`ŸuëÖUTT¼óÎ;Û¶m“Þ/¥§``à´iÓ’’’ÊËË5Maa¡Én»í¶½{÷ !š››ÓÓÓµóúúúªTªö5;véÒ¥µµµ555)))S§NÕ½ª©_gÃÈ3fL]†œ¨Âƒ^»vÍÁÁÁËËK¡PØÙÙÝè7¸sçÎÌÌL³­`N`€ù899ýüóÏ¿ÿýï½½½7oÞüî»ïŽ7ÎÀÃsrr*++}}}ÇŒ#½ßÈÁÁ¡MŸwÞyçæ›o7n\Ÿ>}Fõá‡j/!êïsèС´´´;w*•ʘ˜˜+VÌš5ëçŸÖ3æ{ï½wóÍ73ÆÕÕuÆŒÒõÞ^x¡°°pøðáwÝu×í·ß®tÅŠ{öìéׯŸî½¸ÚuÙÙÙÝ|óÍaaažžžï¼óŽá§Tϰ2䌙A‡'³ ÙuíÚµçŸÞÇÇÇÝÝ}ݺu»vírvv7ø î߿ǎ¦_1 (4ÿý),È¡C‡fΜÙáátˆ+À‹qêÔ©ÂÂB!Ä… ž{~¸»+–„—`,ÆÅ‹gΜyõêÕ>}úÜwß}/¾øbwW, ·@l·@l`À›@Ø0À&€6 ° `€M lB·à–––åË—{yy)•Êøøøªª*Ãû¸¹¹)~­¢¢Â¼å,L·à 6äää>|¸¤¤¤©©iöìÙ†÷©ªªjü¯G}tìØ±þþþæ-`a¦[&ö÷÷OMM}â‰'„gÏž ;wî\HHÈ õihhðóó{ýõ×çÌ™cæú–¥{®WWW_¸p!22RÚ uvv>sæÌöÙ¾}»½½ýÌ™3ÍS6ÀrÙwˬuuuBˆ¾}ûj[ÜÜܤÆê³iÓ¦… öîÝ[Ï\MMMF)Ð-Œrór÷`WWW!Dmm­¶E­VK†÷ùúë¯óòò¶mÛ¦®¦¦&³Ýæ­P˜ï–r³Íe•‹²Ö¹¬rQÌeA1—eÍe•‹²Ö¹¬rQÌeA1—eÍe•‹’æ2Ê8Ýs ´‡‡‡ŸŸß©S§¤M•JÕÐÐqC}6nÜ8qâÄÁƒ›­l€åê¶·@'''§§§«Tªššš+VÄÄÄHo·Ú¹sgff¦þ>BˆºººmÛ¶%''wWýËÒmxåÊ•<ðÀ˜1c ENNŽÔ¾ÿþ;vèï#„xçw\]]ãâ⺡t€ê¶ŸA2+¾ ž§˜«»&b.ËšË*Å\4sYÐDÌeYsY墘˂&²Ð¹ìV­ZÕõQz²Õ«W›sãÆ³¾¹¬rQÖ:—U.й,h"沬¹¬rQÖ:—U.й,h"沬¹¬rQÆŠu\ôhÆŠuÝö 0æD“ˆŠŠÊÊÊêî*ðì»»fuîܹgžyæ‹/¾øé§ŸüüübbbÞzë-!Ä”)S† –‘‘¡ÛyÊ”)û÷ï×m9xðà¤I“ lŸ\RRÒÔÔ4{öl©gí3súBüø£¹gB€›réÒ¥’’’%K–øùù9::†……=õÔSú±³³sÔqÓM7ÞÞ«W¯öFEE…„„ôë×/ @¥Rµï³yóæ‰'úøøDFF.^¼øóÏ?ïp-sæÌññññõõ]°`AMMÔ>räÈgŸ}6666<<|ĈŸ}öY›5Í£>º`Á‚ŒŒŒ°°0¥R9lذ_|QJ›z†MMM7n\hhèž={~úé§'Ÿ|2((ÈÃÃcÚ´i¥¥¥®T{ ´ƒƒƒtNZ[[·mÛ–””ÔáÙ~î¹ç.\øôÓOÞ}÷Ýúß´iÓòåËÃÃý¼¼ÒÒÒŽ9"ÏÎÚm°!ýû÷>|ø’%KvìØqþüùî*#''ÇÇÇÇÅÅå…^X½zµþÎ_|ñŨQ£Ú·?øàƒçÎ+((¸xñâ¼yó´»öìÙóþû襤¤Ì;·ÍgÏž-))Y°`A›v…B¡Ø]»vmÛ¶íìÙ³³fÍúÃþðÃ?œ|øð¢E‹: ·ZÿøÇ?:”––Ö¦½¨¨èèÑ£¯¼òŠ›››‡‡GzzúG}TYY)í]´h‘———4QYYYUU•î±—/_BøùùµŸNÿ°‰‰‰¾¾¾BˆŠŠŠÝ»woܸÑÛÛ»wïÞëÖ­ûþûï ôÇÿÚ´iÓÂ… {÷îÝ~WmmmssóöíÛß~ûí‹/N›6íž{î¹páBg‡×ÕÕ !úöí«ÝëææVWW×Y»!åY70`[ÜÝÝ׬Yóïÿ[­V¯ZµjÉ’%ÅTÉïÿû|nnn†·?þ½÷ÞSü—Z­–ú899ùúúFEE=ýôÓ÷ßuuu‡S¿òÊ+ëׯ?|øpPPP›]åååöööÒæàÁƒ¥FiSû.+'''!D}}½î±R6n, V›™‹‹‹ Ellì°aÆ 6|øpgggmNÖã믿ÎËËÓÞÀÜæä(•J…B‘˜˜8jÔ(''§•+W:88|úé§.½L÷åXjµÚÕÕµ³vÙò¬°QŽŽŽ <}ú´žn...þ:´ÏúÒîèè8wî\ÍiC²–F£ihhè0:®Y³&==ýÈ‘#Ço¿7  ¹¹¹¢¢BÚ,**’ YxhhhPPÐ;ï¼Ó¾ýÃJ÷H !(„øì³ÏÎýWUUÕäÉ“e§Þ¸qãĉ¥\-„hsrœœœBBB´¥é´“¶?ÜÃÃÃÏÏïÔ©SÒ¦J¥jhhˆˆˆè¬Ý“cÝÀ€ ¹|ùò²eËN:U___[[»iÓ¦ÒÒÒèèhioKKË5¦}cKKK‡µíúµ¶¶®_¿þìÙ³µµµùùùIIIAAAÆ Bìܹ333Sê–’’òÖ[où¤žZZZ¦OŸîããÓ«W¯Ì™3çüùóÒ®„„„‰'JE¶ÜÁÁ¡ýP•••³gÏîß¿¿··÷¼yó._¾,µGDD¼ûî»ÒçÆÆF!Ä÷ßßþð“'OÞwß}îîŽ!!!©©©Ò† «ÑhêêêRRR‚ƒƒ]\\8þüæææ6SŒ=úÍ7ßÔnþýï÷õõ½~ýºžóÓÚÚºvíZ___—èèè/¿üRÿáÍÍÍË–-óððpvvŽ‹‹»xñ¢þv e¬X§Æ²b …õ¯¬˜±b}ׇƢèøW!Eó†éëÀñ 0À&€6 ° `à·Û¿ÿ]wÝ%}ŽŠŠÊÊÊêÞzÚë™Unúôé[·níî*~;K?ÿV† kpîܹéÓ§{yy999 2$!!A7þü6=Çÿøã !¦L™¢P(6oÞ¬Ýõí·ß* isÊ”)O=õ”žI5ÍÓO?½zõji3%%Eûƒº=‡±ªÚºuë˜1c\\\ìíõ&ÝüqÖ¬Yžžž®®®111yyyímiiY¾|¹———R©Œ¯ªª2|üÕ«W§¦¦655u} m|õÕWqqqžžžNNN¡¡¡+W®¼råŠÑgÑ=ÿnnnŠ_«¨¨hȱcÇ~÷»ß999¹»»/\¸Pÿ±[CιVðsB‘dÖÿ }Þ˜'Ùp`X1b„"))éŸÿü§Z­Ööüþûï=š”ôÿ_¹úæ›oj÷¾ùæ›aaa†Ï{àÀ¦¦¦ñãÇK›3gμ¡Ãëúõë¶«*%K–¼ôÒKmÚ“““«ªªþóŸÿTVVŽ5êÞ{ïmiiiÓgÆ 999‡.))ijjš={¶áãGDDx{{ÿóŸÿìútíÛ·oìØ±ƒ :vìØ•+Wrss5Í <¼³³Ýžîù¯ªªjü¯G}tìØ±þþþmúùå—S§N1c†J¥ÊÏÏøá‡õÛÙ¹5äœk?øË8ÉÜ3JÀ°x—.]*))Y²d‰ŸŸŸ££cXX˜tåvÊ”)^^^ï¾û®¶gffæèÑ£¥x,„¸ï¾ûŠ‹‹ „¿üòË{ï½—˜˜hø¼{öì™4i’B¡6uov­««{òÉ' Ô§OŸððð“'OvÖ¨õúë¯9R»Y\\lggWRR"„øé§Ÿž|òÉ   iÓ¦•––J}FŽ™šš:nܸÐÐÐ={ö¼ú꫃ rssóöö^¶lYûª.]º4gÎ__ß ÔÔÔhÇyöÙgcccÃÃÃGŒñÙgŸµ_ì”)Szè¡¶i?þüC=4`À¥R™œœ|éÒ¥ÊÊÊ6}6mÚ´|ùòððp//¯´´´#GލT*ÇBÄÆÆîÙ³§}ûo^ŽF£yôÑG,X‘‘¦T*‡ öâ‹/JiSϰºg»³/¥ ÝóïàààèèèèèØÚÚºmÛ6í_Äèzî¹ç.\øôÓOÞ}÷ÝúíìÜrÎm¯ÿþÇ_²dÉŽ;Ο?¯m¿é¦›µ×x›ššÞ~ûmÝÔÑ«W¯ HvíÚ5bĈ¡C‡>ï×_ÝÙÅÕyóæ9r¤®®n÷îÝ^^^5jÍ™3çܹsùùùÒfvvö¸qã‚‚‚„øÃ~øá‡“'OVVVŽ1">>¾µµUê¶k×®mÛ¶={6"""55õã?V«ÕEEE=ôPûª|ðÁÆÆÆsçÎ\¼xqÞ¼yÚ]{öìyÿý÷ RRRæÎkøIX¾|ù®]»*++þùçM›6EGGûùùév¨®®¾páBdd¤´êìì|æÌç1bÄ×_mÄåœ={¶¤¤dÁ‚mÚ¥¿ËÐ3¬ölÏš5KÏ—"kûöíööö3gÎlÓ~ýúõÏ?ÿÜÑÑñ–[néÛ·ottô矮çØÎÎm×Ϲµ"Àâ)ŠcÇŽ?þoû[xxx@@À믿.íJHH8{ö¬t­577÷úõëm’áÿøÇ­[·^»v-33sÑ¢E74ïÕ«W]]]Û·—••}ðÁ™™™T(7ß|sppp‡ºGõë×/..nË–-BFóöÛo?òÈ#BˆŠŠŠÝ»woܸÑÛÛ»wïÞëÖ­ûþû參ÖBˆÄÄD___!D¯^½4Í×_][[«T*ï¸ãŽ6U=zô•W^qssóððHOOÿ裴Wk-Z$òûï¿¿¬¬Lÿ#£ºîºë®ÖÖÖ¸¸¸ìÙ³çÍ7ßÔ^—ÔÕÕ !úöí«mqss“ äêꪽ k”å\¾|YÑ&¨2¬ölëÿRdmÚ´iáÂ…½{÷nÓ^[[ÛÜܼ}ûö·ß~ûâŋӦM»çž{.\¸ÐÙ±Û®ŸskE€5pww_³fÍ¿ÿýoµZ½jÕª%K–|üñÇBˆL›6-33S‘™™9wî\'''Ý}øðáSÇ¢E‹Ö­[7þ|‡škÔ¨Qß}÷]ûö   æææ¢¢"ÙÆ6bccöîÝ›={öl)¼IÅ~öÙgçþ«ªªjòäÉÒ!º—[§OŸþÉ'ŸTWW/X°àh“úš››µ¯–* ¸¡%·QSSSZZúøãKïR^²dISSÓñãÇuûxxxøùù:uJÚT©T †ÏRPP0jÔ¨6]YNhhhPPÐ;ï¼Ó¦]£ÑèV{¶;ûRæÎ«ù/77·g߸qãĉ;ü«''§í¦4îWÜæØÎÎm×Ϲµ"Àâ]¾|yÙ²e§Nª¯¯¯­­Ý´iSii©ö·g&Ožìéé9cÆŒ;ï¼3<<¼ýáñññX¹reû]---×th4ݽӧO×½:§8mÚ´¤¤¤òòrFSXXXTTÔac›íììæÏŸÿꫯîÞ½[ºÿY-...11QºyõêÕ;v477·9¶°°ðàÁƒ×®]sppðòòR(vvvº‚ƒƒÇŽ»téÒÚÚÚššš”””©S§J7ôB:ÒÛ¥³!„ðôô Ù¸q£Z­njjúÇ?þqýúu)híܹSºð.„HNNNOOW©T555+V¬ˆ‰‰‘bžnŸÇ—,mŽ=úÍ7ß”>«ÕêÇ{, ÀÅÅEzásgm !†®ÛXWW—’’ìââ2pàÀùóç777k4šˆˆˆwß}WêsæÌ™Ñ£G÷íÛ·_¿~£FúßÿýßöUUVVΞ=»ÿþÞÞÞóæÍ»|ù²Ô®;Ncc£âûï¿oS˜îïEI¤ÃÏ;7mÚ4www—[o½uÏž=Rÿ„„„‰'JŸ›››—-[æáááììwñâÅö}:ÿÌ™3 øå—_ÚŸ«®,G£Ñœø lŸûî»O©TþùÏ–r¬F£éß¿zzúüùó;œ‹[ „[ hϲoöðððóóÓ&~•JÕÐÐaH'''ÝG…¥¿ 0â_ ¬R·½:999==]¥RÕÔÔ¬X±"&&Fе;wîÌÌÌÔßç±ÇËÊÊúæ›oššš^zé¥ëׯOš4©»°öÝ5ñÊ•+Õjõ˜1ccccsrr¤öýû÷—””,Z´HOŸÇ¼¶¶vòäÉõõõ·ÜrËÇìëëÛ] Xë>–g€„g€hϲŸÀÌÀ›@Ø0À&€6¡Û~ `Ñ,îÇ ¸ ° \ /ß'7×ôuôt\Ø0À&€6 ° `€M l?ƒÀ–Èþb?`½¸ ° \Ѓ(’d:hÞ0K°F\Ø0À&€6 ° `€M l`À›@Ø0À&€6 ° `€M l`À›@Ø0À&€6 ° `€M l`À›@Ø0À&€6áÆpffæ¤I“LT ¦sc¸_¿~AAA¦©²¿¡Þ³fÍš5k–‰JÀtd®Ÿkuuõ… "##¥ÍÐÐPggç3gÎÞgçÎþþþ·Þzë_þò—††ýÓ)~mÕªURû©S§´} ëëëµ›º»t?×××v¸‹,bõðUèî2P_…­Œ0c†˜3Gßÿ {цşF`F`½#¨‡¯Â¦FÐï7Ô””Ô&Ä8—,…F£Ñ³ûÉ'Ÿ,((ÈÌÌ ...þãÿ8|øðW_}µ‹³÷Ýw¡¡¡R‹ŸŸßêÕ«uï¯ÖÓçÓO?uttôññ9{öì²eËFŽ™““Óé 2k`Câãe:äæš¥£’]”°ÌuY%¾,Eù/Kó†1Š€žJ‘$Ó ö²_–0Ò÷e¬X'sŸÕÚµkçÍ›7dÈ;;»–––¸¸¸õë×w}VWWW!Dmm­¶E­VK†ô™8q¢Ô2xð`''§»ï¾ûçŸV*•]/ `­dnvuuý׿þUZZzèСÒÒÒ={öôéÓ§ë³zxxøùùi/y«Tª†††ˆˆˆí#„èÝ»·F£iiiézU+&€…­­­jµº±±100°¹¹ÙXQ3999==]¥RÕÔÔ¬X±"&&Fz»ÕÎ;333õ÷ÉÌÌ,..®­­=qâÄ’%K¦L™Òæê1mÈàÒÒÒÛn»m̘1óçÏBìÚµ+!!Á(¯\¹ò3fL@@€B¡Ð>Ä»ÿþ;vèïóÏþóŽ;îèß¿ÿìٳǎ»uëV£”°b2OO›6mĈëÖ­>|ø¹sçjjjn½õÖ’’s•g¼ Àÿá%Xè^¼ ÀK°,ˆµ½ëøñã»wï¶³³“6ÝÝÝ«««»>+f&s ´R©T«ÕÚͲ²2ooo—€ñÉุ¸ÄÄÄŠŠ !Ä¥K—/^pàÀ·ß~ëáá!„ Ûºukxx¸Y À˜dnÖh4ööÿ’ííí5‰KÀød®O˜0á‘GÉÈÈ(//ê©§&L˜`žÊt§øxù>ýsM_`42W€_~ùå+W®öêÕ+00°ºº:##Ã<•`Dú®755}öÙgG-***//6[eè'û&6^Ãté»Ü«W¯¿üå/Bˆààà˜˜Ò/Àré À …ÂÛÛûâÅ‹f«‘y ÖwÜq×]w%&&0@¡PHsçÎ5}a“LÞ¿¿R©Ü¶m›n#`qdp~~¾yêÀ¤dðÉ“'½¼¼´¯¿***ºråÊwÜaú aÈïåæò{¹@÷“ À »wïÖn677'&&þç?ÿ1qU`=dºOðë}f!€KJJ†ªÝ:thqq±‰K‚MãW=˜ˆ¾ŸABxzz–——k7ËÊÊÜÝÝM\Æ'€ãââ.\øÃ?h4𢢢Gy$..Î<•`D2xíÚµ...C† éÕ«×àÁƒûôé³~ýzóT€É<ìêêú¯ý«¬¬¬¤¤$(((00Ðø //¯¾¾^Û˜‘‘aâª02™¼xñâ;vÄÆÆ*•Jó€)Èà­[·æåå :Ô<Õ`"2Ï»¹¹yyy™§LG&¯Y³fÙ²ejµÚ<Õ`"2822òèÑ£ýúõsÑa”‰[ZZ–/_îåå¥T*ãã㫪ªn´F£?~¼B¡ÈÏÏ7JI+&€~øáèèèO~Í(oذ!''çðáÃ%%%MMM³gϾÑ>¯¿þº½½Ì3ÌHd¤J¥:qâ„“““Ñ'Þ´iSjjjxx¸"---,,L¥R…„„ا¨¨èþçöîÝ+í@?™+ÀQQQ*•Êè³VWW_¸p!22RÚ uvv>sæŒ}4MBB /¼àááaôÚVI&ÇÆÆÎ˜1ã¯ýë{:º>k]]¢oß¾Ú777©Ñ>¯¿þzŸ>}:¼kºCŠ_[µj•Ô~êÔ)mŸÂÂÂúúzí¦î.ÝÏõõõ………îbc` ² «µîïBž°Š6#¨§}zF0®øÏd×õ„UXýŸ,›Á@=|65‚´ «Á@=|Vÿ_ †e¿¡†¤¤¤6!ÎÀ¹d)4žÝ#GŽlߨõ—NUWW{zz?~<**JjQ*•[¶lyðÁeûDFFþîw¿ûꫯüüü.^¼èëë{úôéë”(2kD¢H’é yÃ,u.>^¾On®éë06Ö¥è/¿.Ëû¾ø²z¾,«ü7†E~Y¶JöŸ@Á—ÕcXë—e³ë²ÊE Ö¥"#Å:™g€Mô‚e??¿S§NIáV¥R544DDDÒç‹/¾¸råŠÔY:ãÇ_²dÉêÕ«MQ*›b­ÿç!€M'999==}Ò¤I^^^+V¬ˆ‰‰‘ÞnµsçΫW¯.Z´¨³>ãÆ“¹|ùò­·Þš““3zôèîZn€!K ¸ô¿A·à•+WªÕê1cÆ466ÆÆÆæääHíû÷ï/))‘p‡}œÿõööBˆþýûë>* @{Ý€íììÒÒÒÒÒÒÚ´geeÉöÑòññáù^€!dÞ €u0è pUU•î««‡ b²z0 ™|ôèÑùóç—••é6r×1ÀâÈܽxñâuëÖÕÔÔ4ê0Oe‘ü-ÐóæÍ3CøC~.(—Ÿ € sØ××÷òåËæ)Ó‘¹³hÑ"OOOmãܹsM\F&€?úè£Þ½{gggë6€G&ççç›§LJæ`¬CÇW€³²²¢££Ã²²²ÚïMLL4qUY§ØÍÍ °à'N´ù€Eã`€M l`À›ÐñK°tµ¶¶TVVNž<¹¹¹Y¡PØÙÙ™¡2Àj(’äûhÞ0}€m“¹\ZZzÛm·3fþüùBˆ]»v%$$˜¥0ŒI&/^¼xÊ”)jµº_¿~BˆØØØ#GŽ˜£.ŒJæèãÇïÞ½[{ϳ»»{uuµé«ÀÈd®+•JµZ­Ý,++óöö6qIŸLŽ‹‹KLL¬¨¨B\ºtiñâÅ3gÎ4Ka“L^¿~½]@@@aa¡OïÞ½ŸþyóT€É<ìêêš››[QQQ\\ø //¯¾¾^Û˜‘‘aâª02™¼xñâ;vÄÆÆ*•Jó€)Èà­[·æåå :Ô<Õ`"2ovssóòò2O)˜ŽL^³fͲeËÔjµyªÀDdpddäÑ£Gûõëç¢Ã<•`D2Ï?üðÃÑÑÑo¼ñ/ÁX4™¬R©Nœ8áäädžj0™[ £¢¢T*•yJÀtd®ÇÆÆÎ˜1#))iÀ€Úƹs皸*tE’|ͦ¯L@&ïܹ³OŸ>ï¿ÿ¾n#ÜÙLE ˜LÎÏÏ7O˜”Ì3ÀX‡Ž¯geeEGG‡……eeeµß›˜˜hâª0²N°››`5:À'Nœhó‹&ó ð¬Y³Ú´ÄÆÆš¬LE&Ÿ>}ºMK^^žÉŠÀT:ý¤ììl!D}}½ôA¢R©|||L_FÖi~íµ×„µµµÒ!ÄM7ÝäëëûÖ[o™©4Œ§Ó,ÝêüÔSOedd˜±LBæ`ӥߖ––åË—{yy)•Êøøøªª*Ãû¬]»öæ›ovrròôôŒ/**2Q‘«!€MgÆ 999‡.))ijjš={¶á}bcc÷îÝûã?æåå¹¹¹Íœ9Ó¼µ,O§·@›Ú¦M›RSSÃÃÃ…iiiaaa*•*$$Ä>QQQRWW×€€€;v˜¿~€eéž+ÀÕÕÕ.\ˆŒŒ”6CCCÏœ9cxŸœœ—^xaõêÕú§SüÚªU«¤öS§NiûÖ××k7uwé~®¯¯/,,ìp—ÑG躞° Ý]ÆÕ]«h3BW–Ð^Oûg²ëzÂ*ø“uCzÈŸ,ݺ®'¬ÂêÿduýÛ´ Uü†o³‡¯â·ý{²‡¯¢³ô° Uü¶OöðUð'Ë"Vñþd‰_ÿ7³5$%%µ qÎ%K¡Ñhd;UUUé=dÈ.ÎZ\\üÝwß…††J-~~~«W¯NLL4°Occ£Z­.--}çwf̘1qâÄÎæR( ZcÏ/ÛEÑ?W¶æ ccDÆX—U.J°.³‘[—U.JXëºrsI2],oQúeÀò¾,È.J°®Ã*%lx]V¹(ÁºôOd¤X's ôÑ£GçÏŸ_VV¦ÛØõ‰]]]…µµµÚµZ-5ØÇÉÉÉÉÉÉ×××ÃÃcäÈ‘eee]¬ `Ådn^¼xñºuëjjjut}V???í%o•JÕÐÐq£}„¦¡¡¡²²²ëU¬˜ü3ÀóæÍëׯŸ££Lœœœœžž®R©jjjV¬X#½kçΙ™™zú´¶¶®_¿þìÙ³µµµùùùIIIAAAÆ 3JUk%€}}}/_¾lЉW®\ùÀŒ3& @¡PäääHíû÷ï×¾Õ¹³>yyy&Lðòòº÷Þ{ ðÉ'ŸØÛwÛë¬A&7FFFÆÄÄ,Z´ÈÓÓSÛ8wîÜ®Olgg—–––––Ö¦=++KŸ›nº)7Wþ!è’ À}ôQïÞ½³³³u€0'™œŸŸož:0)ù—` !*++¿úê+Þ´ °\2øÊ•+÷ÜsÏ€F=`À€{ï½·¦¦Æ<•`D2xéÒ¥­­­ß~ûmKKËÒ¥KÍSF$ó ð¾ýö[!DXXØÖ­[ÃÃÃÍRÆ$sX£ÑèþÄ®½½½F£1qIŸLž0aÂ#}ºMK^^žÉŠÀT:} tvv¶¢¾¾^ú Q©T>>>¦¯ #ë4¿öÚkBˆÚÚZéƒ⦛nòõõ}ë­·ÌTÆÓi–nu~ê©§222ÌX&Ñi–¬ZµJ­V·itss3Y=˜„Lîׯ_ûFFcšb0™\^^®ý|åÊ•´´´;î¸ÃÄ%`|2Øßß_÷svvvLLÌ’%KL\F&ó;ÀmØÛÛ×ÖÖš¨LGæ ð'Ÿ|¢ý|íÚµ½{÷z{{›¸$ŒO&/X°@û¹OŸ>·ß~{vv¶I ÀdpEE…yêÀ¤dž>yòdQQ‘v³¨¨è«¯¾2qIŸLNHHhnnÖn677'&&š¸$ŒO&—”” :T»9tèÐââb—€ñÉ`OOÏòòrífYY™»»»‰KÀødp\\ÜÂ… øáFSTTôÈ#ÄÅÅ™§2ŒH&¯]»ÖÅÅeÈ!½zõ}Ö¯_ožÊ0"™ŸAruuý׿þUVVVRRhž²0.™+ÀBˆÖÖVµZÝØØØÜÜÜÒÒb†²0.™\ZZzÛm·3fþüùBˆ]»v%$$˜¥0ŒI&/^¼xÊ”)jµº_¿~BˆØØØ#GŽ˜£.ŒJæàãÇïÞ½ÛÎÎNÚtww¯®®6}U™Ì`¥R©V«µ›eeeÞÞÞ&. ã“ÿàÄÄÄŠŠ !Ä¥K—/^>>'NœˆŠŠºýöÛ5qKKËòå˽¼¼”Je|||UU•á}žyæ™#F8;;ûûû?öØcuuuƪ `­dð»ï¾;zôè9sæ”––Ö××GFF~ûí·F™xÆ 999‡.))ijjš={¶á}.]º”‘‘QTT”››{èС'žxÂ(%¬˜Ì-Ð)))»wïž4i’bß¾}ëÖ­»óÎ;rÅuÓ¦M©©©áááBˆ´´´°°0•JbHŸÍ›7K|||/^œ‘‘ÑõzÖMæ p~~¾”~…7ÝtÓŸÿüç={öt}Öêêê .h_(êìì|æÌ™í#„øâ‹/FÕõ’Ö­ÓüÉ'Ÿ´¶¶úúú !jjjZ[[…---eee]ŸUº†Ü·o_m‹››[› ˆôùÇ?þqèС´´4ýÓ)~mÕªURû©S§´} ëëëµ›º»t?×××v¸Ëè#t]OX…î.ãê®U´¡+Kh¯§ý3Ùu=aüɺ!=äO–î]×Vaõ²ºþmZÐ*~÷ÙÃWñÛþ=ÙÃWÑÙzXÐ*~Û¿'{ø*ø“e«ø ²Ä¯ÿ›ÙÀ’’’Ú„8ç’¥Ðh4ïP(…žžžùùùþþþ×®]srrêìÃUWW{zz?~<**JjQ*•[¶lyðÁ ïóÊ+¯üõ¯=pàÀðáÃõ­PÑé{®øxÙ.Šþ¹²}4o£#2ƺ¬rQ‚u™Üº¬rQÂZו›«H’éby‹²Ð/Ë–÷e@vQ‚uõV¹(aÃë²ÊE Ö¥"#Å:ù·@›‚‡‡‡ŸŸŸ6ñ«Tª†††ˆˆÃû¬Y³&==ýÈ‘#úÓ/’î ÀBˆäääôôt•JUSS³bÅŠ˜˜é X;wîÌÌÌÔß'%%å­·Þ:xð`@@Àµk×~ùå—îZÀRȼÚtV®\©V«ÇŒÓØØ›““#µïß¿¿¤¤dÑ¢Eõùé§ŸÒÓÓ…Æ “qpp¸víZ7­:bÀ®"WþNW‘¾¼|ùr;;;!ÄÏ?ÿ¼jÕ*—––cMlgg—––ÖþýUYYYúû¸¸¸XÞ3½€îÖižsÖÀvð²_³Ô+×ïh„ IDATm/ÁÀœÀ›@Ø„nû$z~³ ›A@†"I¾æ Ó׺†[ 6 ° `€M l`x 4t^, `f\Ø0À&€6g€‹—ï“›kú:~ ®l`À›@Ø0À&€6 ° `€M l‚}wòI24o˜¥X2®lW€Ë/ß'7×ôu‰+À›@Ø0À&ð 0¬”ìã²<+ Ø®l`À›@Ø0À&€6 ° `€M l`À›`ßݬŠ"I¾æ Ó×ÐØæÅÇË÷ÉÍ5}`ZÜ ° Ý€[ZZ–/_îåå¥T*ãã㫪ª ï³uëÖ1cƸ¸¸ØÛs`n 6lÈÉÉ9|ø°··÷‚ fÏþì½{xTå¹þÿ„$dÈ’˜ƒ –€ÐR`›*•*A+JÄ"… Zë—VB•ƒŠ9(ZÙbå´ao°‚õD`£Í&äD ‰eÂûûc]_v¤ÌZï¬÷žûó‡×ÌJ®ù¬'fî‡'ï;kþôÓO¯ó{ÂÃçL™R^^þôÓO{ãÜ ±/üø%!„B!WÃkð²eËfΜ™œœ,„X°`A×®]óóó;wî|=ßs÷Ýw !>üðC¯œ¹™ŠB!„€wàŠŠŠ3gÎôîÝÛxÚ¥K—   C‡5€¯ç{”‹EB!„BˆÎxç3ÀçÏŸB„††º„……ÿ­ï¹Nÿ—?ýéOÆñ¯¾úÊý=ÇŽ»pá‚ûiã/5~|m®ç.\¸pìØ±+~©É9xÎužƒ¹\ã'i.×þIzþ ×ùóß<ëŸà:ÏÁ\Ñ5~žcú;ëßÃw–)xþκ~”ý$}öeJÊ™û ×@£*nàÿ¦Í«¸±œ´y7Ðq4ªâÆrÒæUð¥Eʦ¤'žx¢Éw®ŸÄ!¥4ëµ®ŸŠŠŠˆˆˆ/¿ü²OŸ>ƧÓù׿þõ¸þïùðÇêr¹®ír8LªQå ðu¸mÚu]Û’us]ï^ëŸtñ–F®r¾Y¿˜?@T—Y¿¶*ÊD—Í0í]l'P?"YdQ‡ë‚,Jذ•Ø©m™5Öyg8<<<66Ö=ñççç_¼x±{÷îÿî÷B!„B!׉×.‚5qâÄW^yeàÀ‘‘‘Ï<óLjjªñáÞuëÖ;wîñÇ¿Æ÷444\ºtéÒ¥KBˆúúz!D`` · !„B!„èê:¹^»ðsÏ=7|øð~ýúÅÇÇ;Ž5kÖÇ·mÛ¶víÚkÏ_ÿú×–-[>¼¡¡¡eË–-[¶,//÷N„B!„B4Ák+ÀÍ›7_°`Á‚ š_±bÅO~ÏøñãÇoù)B!„BÂk+À„B!„BˆJ8B!„Bñ 8B!„Bñ ¼ö`B!„B±Šë¸‡­¸Ž{Ø08B!„BˆpØÖn&„B!„âp&„B!„âp&„B!„âp&„B!„âp&„B!„âð*ЄB!„\×s±ß&]ìWå……mU—Y"B®W€ !„B!„ø€ !„B!„øÜM|Ç?ý=r¹õçA!„ø0?ÙŽÙ‹íÿíD0àL!„B4ç'?VjÖge¯Š„ØÀ„B!^ÅV× ¯«ÄkB”ÃØLø?B!v3•Õ"ÁEEBÑ^‹B!„BˆOÀ˜B!„BˆOÀ-ЄB‚&„r58B!> ?VJ!Ä×àhB!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`B!„B!>`BˆÝùÓŸþäíS v„¿äŠðC#ø?‹\þbKáL±;þóŸ½} ÄŽðƒ\þbhÿg‘+Â_ b)€ !„B!„øZÀ 3f̈ŒŒt:Æ ;{ö¬·ÏˆB!„BˆÝÑr~饗֬Yóé§ŸþóŸÿ=z´·ÏˆB!„BˆÝñóö ÜË–-›9sfrr²bÁ‚]»vÍÏÏïܹ³·Ï‹B!„Bˆ‘ºQ^^.„øòË/ÝG‚‚‚Ö¬Ysµï÷ö˜B!„Bˆ§˜2Nê·|þüy!Dhh¨ûHXX˜qðŠp&„B!„"tü pHHˆ¢ººÚ}¤ªªÊ8H!„B!„\ ýàðððØØØ¯¾úÊxšŸŸñâÅîÝ»{÷¬!„B!„؇Ž;„333ßyç-[¶DFF¦§§WWWöÙgÞ>)B!„B!¶F¿Ï !ž{ªª~ýúÕÕÕýú׿^³f·ÏˆB!„BˆÝÑr˜B!„BùwÑï3À„B!„BÈ À˜B!„BˆOÀØ#>|ìØ1ãqAAÁñãÇé²³KQTT´sç΢¢"K-tYÁÔ©SÁD0.eïbÔh¢K‘b—j¼+sÁ÷,T@Ϣ˾Hâ}úôùøã¥”7nô÷÷ \¼x1]ötUWW§¥¥ !š7o.„HKK«®®¶BD—)ìÞ½{È!½{÷¾í_4»”½‹!£‰.DŠ]¨ñ®ÌÙ³P]=‹.Û‹`yDXXXIII``àÀÓÓÓ{öì9xðà‚‚ºlèzê©§òóó—-[vóÍ7Ÿ®ÒÒÒÑ£Gúé§~~~.—kÀ€|ðAdd¤Y¯O—¹ôë×o÷îÝ—/_îСéS§¤”!!!.\ÐWìRö.†Œ&º4)v¡Æ»2dÏBuAö,ºl /‚åãÇ¿âñ®]»^íKtyË5uêÔÖ­[Ÿ:uª]»vß}÷Ý”)S¦Núþû ËDBBB***ÂÃçùùùaaaZ‹€]ÊÞÅÑD—F"Å.ÔxWæ‚ìY¨.ÈžE—}ñÚÚ3!jiÛ¶­q¡vƒsçÎEEEÑe[×ïÿûnݺ-\¸0<<üÕW_íØ±ãôéÓµ»!V€ïÊ\= ÕÅžETÂ-ÐÄWˆŽŽÎÉÉiÓ¦ñ´¢¢"99¹¸¸˜.{ºÜ»kGDDÄwÞ9yòd}EÀ.Bˆ Æ»2dÏBu±g•pöˆ”””[B—ç>|800°S§N7ÝtSAAÁñãÇo¹å+Dt™׎4r5AYb`D]úЬv¡Æ»2dÏBuùBÏ¢ËFxù"\šS׈sçνûî»ÉÉÉtÙÓÕ§OŸ?þXJ¹qãFÿÀÀÀÅ‹[!¢Ë JKKˆ$Br){CF]‰»Pã]™Ëzª £gÑe[8{D“¿&ÄÇÇöÙgtÙÓZWW'¥0`ÀªU«rrr¬ÑeݺuÁ¸”½‹!£‰.DŠ]¨ñ®Ìå#= Õгè²-ÜíÇw?ŽŠŠ¢Ë¶®fÍšþðÃ{öìù¯ÿú¯ÐÐÐsçÎÑe[W}}}ãÇÿó?ÿãp8´»”½‹!£‰.DŠ]¨ñ®ÌÙ³P]=‹.ÛÂØ#:vìH—.®„„„7–••%''‡††ÖÖÖÐe[WË–-?ï½÷´»ŒwqQQQaa¡u¿*Eð.5(sA%pã]™ ²g¡º {]öÅÛKЄ(bóæÍþþþ›6m’RîÝ»wÚ´itÙÖu¼%%%YTŠ€]ÕÕÕiiiBˆæÍ› !ÒÒÒª««µ»ˆF Æ»2dÏBuAö,ºl `âCÔÖÖ*{ÒE|‡É“'8ðĉ—/_>qâÄwÜ1eÊ­EÀ.¢¨ñ®ÌÅžE~ j¼£º¬€÷&„âíÚµÛ½{w||¼ñôÔ©S¿úÕ¯ ô»!ÄÇAwT—pöˆšššºººÈÈHã©”²´´Ô¢‚ÓE±'ÁÁÁçÏŸw_°DJráÂ}EÀ.Èx‡,Šb¨ñŽê²‚fÞ>½™1cÆË/¿l<®¬¬¼õÖ[£££;F— ]„+HHHh|AÈãÇ·oß^k° 2Þ!‹"„Xj¼£º¬€°G|öÙg=ôñxáÂ…¡¡¡_}õUÿþýŸyæºlè"QSSSVVæ~*¥<{ö¬Ö"`×Ûo¿íç÷ÿßSÀÏÏïí·ßÖZ삌wÈ¢ˆ^ Æ;d/FwT—%¨ú°1&-[¶<þ¼ñ¸W¯^kÖ¬‘REDDÐe7×… JKKÝO/_¾lÝeéòœ‰'º/×YQQÑ­[7!ÄÍ7ßœŸŸ¯©ØepòäÉÏ?ÿüäÉ“½¾z¤ 2Þ!‹’¸ñ®ÌÙ³P]À=‹.ÂØ#BBBªªª¤”uuuÇ7Òe7dÃv%%%íß¿ßxü /ôíÛ÷«¯¾zä‘GÒÒÒ4»Îž={Çw!Œ¿0 ñ?:u» ã²(‰ï3j¼CöbÔxGuY`èÝ»÷Ò¥K¥”ï¼óN\\œq0'''11‘.»¹ °‹kG¹|ðÁáÇŸ:uJJyêÔ©´´´‡zÈt‹J° 2Þ!‹’¸ñ9S¡Æ;d/FwT—pöˆ÷ß¿Y³f7ß|s³fͲ²²Œƒ¯½öÚ¸qãè²› ²a»¸v¤‘«mÛ¶†ËàܹsQQQ¦[TŠ€]ñY”ÄwÈ™ 5Þ!{1j¼£º¬€°§üãÿÈÌÌüè£è²¹ ²a»¸v¤‘+**ª¢¢Âý´¼¼<::Út‹J°K"Æ»J‘Jj¼CÎT¨ñÙ‹QãÕe€‰¯Ù0€]\;ÒÈ5f̘{î¹'''§®®î›o¾¹ë®»}ôQÓ-*EÀ.¢¨ñ9S¡Æ;d/FwT—8¤”Ö_j–»ï¾ûj_Úºu+]¶rýío3fLBBBaaá¢E‹&Ož,„X¸pá‘#GL¿t;]¦ðé§Ÿfgg÷ìÙóž{î1ýŽ"Bu•——3Æý†‚‚‚.^¼h<®­­ ¢Ën.ȆìJJJ2üx™ES°ë‰'žxñÅ;uêôßÿýߣFjÖ¬ÙüùóŸzê)}EÀ.7Hñ®^¤Æ…ï3j¼CöbÔxGuYW€M#::ú•W^yðÁçÍ›·fÍšÐe+ê9T—®ÙßVRR8pàÀôôôž={<¸  À\‹J°«10ñî‘j¼Cîwïê]H=‹.ûbε´ˆ”YYYBˆæÍ›ûùù­_¿ž.ÛºŠ‹‹Ý7+»té’¥ïº<§ºº:88Øx\UU¤»ÏÕºuk)e}}½Óé4n:jºE¥ØÕ¼xW)RéBwe.Èž…êBêYtÙn6É“'÷ïß?77·G;w¦Ë¶.°=rð®   ÷ßÿÁ\ºt©ñ­Ex®„„„7–••%''‡††ÖÖÖ˜nQ)v5/ÞUŠTºPãroÊ¡C‡Ôì‘£‹Bˆç Æ»2{!Ä€0!„B!„Ÿ ™·O€Ø‘ššH!„<س!„\?€=bÅŠ Mnݺõý÷ß×Úœ‘‘Ñäà¸qãž}öY­]„âã@¶-ö,B!×`˜0a‚qC¹ÆTWW/Y²Dk—bݺusæÌi|䡇úûßÿ®»‹/‚ºNUSSSVVæ~*¥<{ö¬Ö"`jÛbÏ"Ä ”µÈžE—máì)[¶lùðÿR^^~èÐ!Ý]Û¶m[¾|¹qÕDƒäääo¿ýVw¨3•²µ#î­ðœ3f¼üòËÆãÊÊÊ[o½5:::11ñرcšŠ€]´m±giäL¹·B(l%=‹.ûâ­ËOc „h~twÕÕÕåççGGG/Y²Ä8¸oß¾ØØX}]o½õ–ËåjrpË–-«V­2W¤Ø%„˜>}z“ƒééé3gÎÔÝUWW×äàêÕ«ûöí«©Èpµk×.33³ñÁíÛ·'''kíJJJÚ¿¿ñø…^èÛ·ïW_}õÈ#¤¥¥i*vA¶-Èž%Ù¶ôIå­¯mAö,ºl `¸b!¹Ž9ùÀ¼þúë]ºt™4i’¾.6'í\6lØüÉÊÊr:šŠ Wnnn||üâŋ݋‹‹[¶l©µ«eË–çÏŸ7÷êÕkÍš5RÊ¢¢¢ˆˆMEÀ.ȶÙ³$Û–>"©¼•àµ-ÈžE—mñ³f]™èM@@€ÃáB$''ïÛ·oúôé .ìß¿¿{·ƒ¦®-[¶´hÑ¢ñK7þ)smÛ¶mРA¡¡¡O=õ”qÄÒ½Ê\#GŽ´âe½(B$$$ìØ±#55µyóæ“&MBµiÓFkW‹-._¾,„¨¯¯?räHÏž=…¦ï?T&vA‚Ú³Û–>"¡¶•@¶-¼žE—}ñö®7eee.HâÆ?‰»÷O(_;Ré[§êÝ»÷Ò¥K¥”ï¼óN\\œq0'''11QS°‹mK#ضtIнRa+ìYtÙÀqáÂH$lNšº¬Fåÿ¬€€€úúzãqAAÁðáÃ;tè0f̘êêj­]ï¿ÿ~³fÍn¾ùæfÍšeee_{íµqãÆi*v±miÛ–."‰û?KY+ìYtÙ‡”RÍR3$‡cúôé ,h|pܸqmÛ¶5}7”J× AƒFŒñøã !>ùä“uëÖ¹¿´lÙ2M]‡£®®.00ÐÄ×´ƒ+00°ºº: @QXX8}úôôïß?+++$$D_WyyyDD„¹¯é]6Ÿ~úivvvÏž=ï¹ç ª ²mAö,Á¶¥H¨m%l[žïÀ.óñö®7ôúC'Ož4ïÚµ«mÛ¶“'Ož8q¢¿0Ê\Üø§ÊÖŽ¸HE| ȶÙ³$Û–Vpo!Á`p8¹¹¹ƒ zæ™gÜ×W())¹ùæ›/^¼¨¯+  ´´444Tñí·ß¦¦¦ž>}º¾¾Þ¸Ÿ¦®šššV­Z™ø‚6q¡¢líˆ{+<çî»ï¾Ú—¶nݪ£ØÙ¶ {–`ÛÒ È½Ba+ìYtÙ–fÞ>í1®Y7gΜ¥K—G¬¾¦«×M7Ý”››k<ÎÍÍmÛ¶­é õ.•÷XWé4hЛo¾i<þä“O&6Bk—bݺusæÌi|䡇úûßÿ®¯èÀ¿þõ¯Ç7nôóós8Ë—/×Ú•ô/Úµk·sçNãqçηmÛ¦©Ø%ÛdÏl[úˆ ”µ•.e­²gÑe_¼ºþ¬=ôúCS¦LéÑ£Ç_|±cÇŽN:½ð Rʺº:+~a”¹âÆ?‰»÷O Þ{Ðßß¿ªªÊx|âÄ ãJ¤½³TºÜGEE/]ºdK™ÏÙ¶ {–dÛÒG$qo󮬕`÷,ºì†gi[P¯éZYYyÏ=÷8‡Ã1bĈšš)¥Ëåzíµ×ôu±9éåˆ÷Éhß¾ý—_~i<Þ¼ys=¤e?@•.7ÕÕÕÁÁÁÆãªªª   ÝEx.ȶÙ³$Û–>"‰{KBe­»gÑe78“«R]]­ìB \lNz¹”­qo…¹DEE­Zµª¡¡á¥—^JII»ÀëY’mK‘Ý[!¶øžE—­àlååå x.)e]]]yyù¥K—\lNz¹ ï=ˆºNÕ˜¬¬,!DóæÍýüüÖ¯_ ‚t¡¶-¤ž%Ù¶ôIнRa+ïYtÙ À‘——×¹sg!D‡Nœ8±råÊ®]»¦¥¥kí’R¾ñÆ]»vu8Bˆ-ZÜyç{öì±B¤ÌÅæ¤— ¼uª&ÈË˃!¹PÛ^Ï’l[úˆàQÖJ°{]ö·Aòˆûî»/,,ìÉ'Ÿ\¼xñ÷ß/„1bÄêÕ«cbbÖ®]«¯Ë¸bçøñã…+V¬5jÔ?üðî»ïnݺµÿþúº€9þ|³fÍÔÜÄB¥«¢¢¢uëÖÍšY~½ze"ƒúúúÚÚÚÐÐP???$±?m‹=KG”µÈž¥Ø%¶ö,¢oOàzÓ¦M›ÊÊJ)eii©âÌ™3RÊ3g΄‡‡k튉‰9|ø°ñøë¯¿¾õÖ[¥”¯¾újÿþýµvIÜnµ#î­ >dÛîY’mKêÞ ©°•°gepöˆV­ZýðÃò_)1v×ÔÔÔ¸¯‡¦»KJyñâÅ)å©S§õu±9éåº÷Þ{~øáììì|055555uñâÅýúõ9r¤¦")efffllì¬Y³fÍš;mÚ´É“';Î;wjí"Ù¶ {–dÛÒJ¤²•@¶-ö,¢ÀѳgÏ¥K—J)/^ܾ}û§Ÿ~://ïé§Ÿ¾ýöÛµvõíÛwîܹRÊË—/Ï™3ç—¿ü¥”òÔ©S­[·Ö×Åæ¤—KÙÚ÷VŸ²mAö,ɶ¥H‚î­ [ {Q `ذaƒŸŸŸÓé ;zôhŸ>}„‘‘‘{÷îÕÚµ{÷ˆˆˆˆàà`£UlÛ¶mìØ±úºØœôr©_;âÞ Ò•bQé‚l[=K²rËöK IDATmé#’ {+¤ÂVžå9¨mË 8{ʱcÇÖ­[g´ŠË—/Y÷!•®3gά\¹råÊ•§OŸ¶H¡ØÅ椗KÙÚ÷VxÎ[o½år¹šܲe˪U«4I)…Ó§Oor0==}æÌ™Z»$hÛÂëY’mK‘Ý[!¶Èž¥ØܶL‡°9¼÷Þ{tÙÜÅæ¤—KÙÚ÷VxŽø×D³zõê¾}ûj*2\íÚµËÌÌl|pûöíÉÉÉZ»Ü Å»z‘Û–." º·B*l%=K½ »m™`sPy=mºn 6'½\RáÚ÷VxˆbÆ ›ÿ/YYYN§SS‘áÊÍÍ_¼x±û`qqqË–-µv5–Z÷âÞrŶ¥‹Èro…TØJðz–zvÛ2ÀæÖtQ]lNz¹ ¸vd—¢ùUÐT$ÿõgûüüüèèè%K–÷íÛ«µ«±Ôº÷– ¯(¶-]DnÀâ]½ ©g©wa·-álxMØÙ0€]ü§³ý]WÜ⥵¨±ëÈ‘#‘‘‘<ðÀ믿ޥK—I“&iíj,µîŽå‚,JâÆ;ÒLåõ—é]ì•V¢Ò…Ú¶LÄ!¥ÄcªªªÂÂÂèÒÂåp¨ûµ§K#dQj\‡£®®.00ÐR‹J‘"00°ºº: @QXX8}úôôïß?+++$$D_—Èx‡,JÀ%†zdQ¨.¤ž¥Øß¶L„0ñ9 ]‰ð\ååå–*‹±`‰¡ÞYª ©g)v‘ëÇÏÛ' 7)))×øêÁƒ鲕‹èȹsçÀDx.5ÛV)2Þ!‹"šïê]H=K±‹\?€=bذaË—/7n\BBBaaáÛo¿=qâÄÄÄDºìé2€lÀ.e› •‰ð\ÁÁÁÓ§O_°`AãƒãÆkÛ¶íË/¿¬£H1hР#F<þøãBˆO>ùdݺuî/-[¶L_d¼CÕÔxGš©Ü€Å»zRÏRìBm[VÀ-Ðalvwÿ=øë¯¿ž2eÊ®]»è²§‹h׎4r9ŽvíÚ=þøãÏ?ÿ¼ûàŽ;~ÿûß9rDG‘"22rß¾}:tBìÞ½{øðá#GŽlhhX¶l™é}S¥ 2Þ!‹"zï½Xe+aÛ²'\öˆÃ‡ßzë­î§Ý»w?|ø0]ötA6 `׎4r !¶mÛ6hРÐÐЧžzÊ8’œœüí·ßê+:þ|›6mŒÇÑÑÑ-Z´ÈÊʪ¯¯·âoÛ*]ñY”ÀwÈ™ 5Þ!{±PØJTºPÛ–pöˆÄÄÄ+V<ñÄÆÓåË—wìØ‘.{º °kûöí}ô‘ûŸ/ÇŸ2eʬY³ô»„ ;vìHMMmÞ¼ù¤I“„EEEî覛nÊÍÍíÓ§"77·mÛ¶¦+¼â‚ŒwÈ¢n¼CÎT¨ñÙ‹…ÂV¢Ò…Ú¶,ÁÂ[,ù_|ñEppprrò½÷ÞÛ­[·]»vÑeOW¿~ý8à~úÕW_õë×Ï ]¦ìr¹ÜO].Wpp°Ö"`—Pu?@e")å”)SzôèñÅ_ìØ±£S§N/¼ð‚”²®®Îо©ÒïEIÜxWæ‚ìY¨.Èž¥Ø…Ú¶¬@³´3¥¥¥Ë–-ûãÿ¸lÙ²²²2ºlë‚lÀ®””ã“$o¼ñF=´»êëëÇÇïСØ1cª««5I)+++ï¹ç‡Ãáp8FŒQSS#¥t¹\¯½öšÖ.‰ï*E*]¨ñ9S¡Æ;d/VÙJضì `â+@6 `׎4rS]]}áÂ<ÑÔx‡œ©Pã²cöu=ð*Ðaì}ož={Ö¯_¿`Á‚={öÐe×Î;ó›ß´oß>!!¡   ¨¨è£>êׯŸY¯O—锕•mذáôéÓqqq÷ß¿u·’W&v !***Z·nݬY3K-*Eõõõµµµ¡¡¡~~–_5C 2Þ!‹¸ñ®Ì…Ú³P]¨=K±KÀµ-+àì+V¬øñÁñãÇçäädgg?ž.[¹ °‹èB~~~ZZZ~~~‡¶oß¾{÷î¹sçÞrË-Ë–-‹ŽŽÖQd°dÉ’7Þx#77WJÙ¢E‹ÿøÿøË_þò‹_üÂt‘Jd¼Ce€ï¨3Ñ•­„mËžp&„Ø®iäºï¾ûž|òÉÅ‹ÿý÷Bˆ#F¬^½:&&fíÚµ:Š„sæÌYºt©1ϬX±bÔ¨Q?üðûᄏuëÖþýûëë"„Xj¼Cöb•­„mËžè´Z­S§N]¸p!]6tA6 `×ÕRºvíjî‹2°+;;ûĉ­[·îرcÛ¶mÏœ93|øðŸýìgšŠ„K–,Ùºu«qÇ×´´´G}ôðáɉ‰Ï=÷ÜÎ;õuý€x÷¢ÈRj¼CÎT¨ñÙ‹U¶¶-{Â`ÈÎΞ;wnqqñåË—#GMNNBìß¿Ÿ.[¹P÷È¡ºˆFWTTøûû×××·lÙ²¦¦ÆétÖÖÖÞtÓMçÏŸ×QÔØ%„¨««‹ŽŽ®®®þî»ï:wîlÜéASd¼C%pãro<ѯ´¶-[ÁØ#ºté2dȤ¤$‡Ãa™9sæÜ¹s…cÇŽ¥Ën.¢5\;²­ë¶Ûn›0aÂĉ³²²,X0tèÐI“&-]ºôÀŸþ¹Ž"!D¿~ýÒÒÒžyæ)åK/½ôá‡fgg÷Ýw)))•••úº ã²(¢;ñîE@ÏRìBm[–à•kOÃÖäHbb"]¶u5áé§ŸV#¢ëؽ{÷!Cz÷î}Û¿0h*vmذÁÏÏÏét†……=zÔØv¹wï^MERÊÝ»w‡„„DDDDDDïܹSJ¹mÛ¶±cÇj킌wÈ¢®F¼{ÑгP]=K± µmY?ì½zõjr¤GtÙÓuÅ}k»víªöÈÑõoñØc 2dèС—Yžzê)s-*EÀ®aÆåää:t¨oß¾111ÙÙÙgΜ‰ŽŽ6ý¦ÊDBˆ¾}ûæææîØ±C1`À€ØØX!Ä Aƒ ¤µ 2Þ!‹¸ñ®ÌÙ³P]=K± µmY·@_uª«uëÖçÎk|¤cÇŽ'Nœ0×¢Rìr³jÕª‡~ØR…b°‹ØÔx‡Üï½Ø j¼£ºÌÄÛKЄ(uªkàÀMŽŒ1Bk°Ë²ž¢²y¡ºˆýAwȽñ¨ñÙ‹Ý Æ;ªËDšyoô&D)¨{äP]Û·oordݺuZ‹€]„+@wȽñ¨ñÙ‹ áhB!æàp(ê)ÊDÀ.BñqPãÕe"\&„bM>Á vBˆƒï¨.Ñrj'„B!„Bþ]x$B!‘’’r¯øàøCFF†ES7¤KeQÄÇámˆ±eË–&ó@yyù¡C‡è²§käÈ‘V¼¬E¨®>}úüøàž={Ö¯_¿`Á‚={öh'v8p ´´tÔ¨QV,WzËYTcbcc7mÚôÁ 0Àê‘Ò¥@TRRRVV&¥,))BÜqÇ›7ož:uêúõë­Ø†€çRYj¼£º¬€°G”••©Y&¥Ë g`WMMšß e"T×øñã¯x¼k×®Wû’ÍEÀ®”””êêê)S¦¤¤¤˜ûÊ^tA%@ç•.•EÝtÓMMX¤KeQ¨ñŽê²n&¾ä¾nº4»!V€ºµroüÁƒ¯¶5Àô¿•@ºTEW€=büøñwÝu—š51ºˆO¡lkê~•®3uêÔ… "‰`\ñY”ÀÝÚ ¹7uä>Žƒï¾ãòÀQXXxîÜ9º´p¡Î¨.e—TyÝBTWvvöܹs‹‹‹/_¾l9zôè®]»„û÷ï×Q삌wÈ¢蜣ҥx¦ ýóŸÿ¬@„êR&BwT—p 4!ÄŽpíH#W—.]† ’””äÞp8sæÌ¹sç !ÆŽ«£ØE±Ôx‡ìŨñŽê²µ·†¥¢¢¢   ¢¢‚.Ûº{ì±µk×Zôât™Î€–/_Ž$v………59’˜˜¨µØå)ÞÕ‹Ô¸Pã]™ ²g¡º {]¶¥™·p½ihhÈÌÌŒïСCxxx||üœ9sè²› uªkÇŽ?þ8’ØÕ«W¯&Gzô衵ØïE Üx‡Üï½5ÞQ]VÀ-Ð1cÆŒõë×gddôìÙ3,,¬ªªjÿþýóçÏ=zô¼yóè²›‹èHeeåùóçCBBÚ´iƒ!vûïEMAwÈ^L|o/AëMxxx^^^“ƒGމˆˆ Ë†.°=r¨.—Ë5{ö츸8wXÅÅÅeffº\.MEÀ.¢ñYTcÀâ]½ ©g¡ºØ³ˆJ8{DPPÐó´´´4((ˆ.»¹ °+###!!!+++;;;''';;{Ñ¢Eñññ3fÌÐTì"ïEIÜx‡œ©Pã²ÂØ#ÒÒÒ† ÒøÁ999ƒ:t(]vsA6 `׎4r€ŒwÈ¢$n¼CÎT¨ñÙ‹ áì%%%©©©B§Óãt:…©©©%%%tÙÍÙ0€]\;ÒÈE42Þ!‹’¸ñ9S¡Æ;d/&„Á2¼¼¼C‡ÙïÞ½{RR]6t9΢¢¢&×T(++KHH¨­­¥Ën.c9eÞ¼y;w6ŽäææfddøûûoܸQG°‹hX¼+)s¡Æ»2dÏBu±g¥xmôÆâ½÷Þ£Ëæ.Ô=r¨.®iä":‚ïêEj\¨ñ¹75Þ!{1!\6‡CÝO’®ãìÙ³£FúüóÏNghhhuuummmjjêš5k¢¢¢è²›Ë€kG¹ˆ^ Å»z‘j¼+s÷,T{Q`skºÀ.ȆìB¬ZµêᇶT¡Xì"ï*E*]¨ñŽ:S¡Æ;X/¾xñ↠úõëסCºìï2ÕKΠ¨üIÒå!`{äà]Ê~1 ÛÕ¸Þzë­ßVdË–-«V­ÒTìröK¨X¤Ø…ï{ãQ ÁÞÅEEEþþþ111YYY—/_¦Ëæ.Ói¦~ä†äܹstéâ3fŒ]ÄG˜0aÂ¥K—𬮮^²d‰¦"`—Èx‡,JàÆ»2{ù1ÁÁÁûöíÛ¶mÛwÞyòäIºlî2?oŸaaatiä"„˜Ë–-[Z´hÑøHyyù¡C‡ô» ã²(BˆEÄÆÆnÚ´éƒ>0`@FFÆ“O>ép8è²­ËDø`8}úôþð‡ÿýßÿ½páBãƒtÙÓeù!1`WUU•šh*á¹GóæÍ¯ø%—Ë¥£ØïE55Þ!?ïê] D%%%ÅÅÅüæ›oŒ#åååS§NmhhøôÓOé²›Ë 8{ÄwÞ™˜˜8dÈ–-[º8.{º °‹Ø‡ÃQWW#vAÆ;dQAw¤™ŠhÄ5–(M‹è²'ÜígÏžýÇ?þA—..Ô=r`.®iä"ïE5,ÞÕ»zªKeQ(--5j”‚µJºì ` q¹\~~*~ŒtydÃv=òÈ#‰‰‰óæÍk¼Ì¢µØUVV¦fõR™ØïE Üx‡œ©Pã²§¤¤TWWO™2%%%….û»¬€[ =âõ×_ß¿FFFxx¸û`\\]6t¡î‘CuuëÖÍýÁKQ&v€ŒwÈ¢n¼CîGwÈ^Lïì*¤tyH×®]­xYº,¢OŸ>—.]B»{ì±µk×"‰€]ñY”Äwe.Èž…ê‚ìYtÙnöÔ[BºP÷È¡ºFžž®`™E™ØUXX¨æ¬L삌wÈ¢n¼CîGwÈ^Œï¨.+àhâ+ î‘Cu]ñƒVä•2°‹b¨ñ¹75Þ!{1!€=¢OŸ>×øêž={è² ²a»ªªª~|Њ+y*»ÜTVVž?>$$¤M›6"<d¼C%pãr¦BwÈ^ì,Þá]&Â-Ð1~üxºtq¡î‘Cu!ÝÞÕÐÐðÒK/-_¾Ü}yÕ¸¸¸‰'>ûì³Í›7×Q삌wÈ¢n¼CîGwÈ^Œï¨.+à 0!ÄŽpíH#׌3Ö¯_Ÿ‘‘ѳgϰ°°ªªªýû÷ÏŸ?ôèÑóæÍÓQì"„Xj¼CöbÔxGuY`â+@6 `׊+®ñUa”‰€]»wïîܹsãƒG½ãŽ;ÊÊÊt»ˆF Æ;äL…ï½5ÞQ]VÀ-Ð7µoú|ðàAºlå2@Ý#G—."`W]]]ddd“ƒQQQ/^ÔT„炌wÈ¢ƒšŒwº¼(pñﲮ߫V­2œ¸cǺìé"ö‡kG¹Üœ={vÔ¨QŸþ¹Óé ­®®®­­MMM]³fMTT”Ž"`d¼CE´5Þ!{±ÔxGuY·@{ÄáÇ¿u###ÏŸ?O—­\ Ø•‘‘a<¸â2‹Ž"`—›¨¨¨Ï>û,//ïСCÆíºwïž””¤¯Ø…ïêEj\¨ñ9S¡Æ;d/vƒï¨.+à쉉‰K—.}ê©§Œ§Û·o·nE®²a»~øaãAÿþýÿþ÷¿»—Yžxâ‰|PG°« IIIû÷ïŸ0a‚¥•"HR¼«©q¡Æ;äL…ク xñŽí2I<à‹/¾NJJº÷Þ{o»í¶ÐÐÐ]»vÑeOW¿~ýöïßï~ZZZ:`À+Dt™Bpp°Ëåj|¤wïÞZ‹€]n”õ•Í ÌïEIÜxWæ‚ìY¨.ìžE—Ýhæ½ÑÿøÿøöÛo§Nš’’2a„'Nôë×.{ºÀöÈÁ»Œe÷S«×Žˆ€]D# ã²(ï{ãQã²¢åÔn:”ŸŸo<>yòä±cÇè²­+%%eñâÅî§üq¯^½è²­‹kG¹Ü(ë)*›˜ 2Þ!‹’¸ñ®ÌÙ³P]Ø=‹.»¡åIÛ‡>}ú|üñÇRÊ7úûû6ŽZºlå‚lÀ.)eiié²eËþøÇ?.[¶¬¬¬ @ì28wJž 2Þ!‹’¸ñŽ:S¡Æ;d/6‹wx—‰ð6HVRR8pàÀôôôž={<¸  €.{ºÊÊÊ6lØpúô鸸¸ûï¿?"" ]¦pøðáÀÀÀN: ! \.×-·Ü¢µØE42Þ!‹2€Œw•.¼ž…êbÏ"Jñö®7­[·–RÖ××;Ϊª*)ehh(]öt¡î‘CuqíH#WQQј1c’’’b¡µØïEIÜx‡Üï½5ÞQ]VÀØ#zôè±aÆåË—ÿâ¿RÖÔÔ´mÛ–.{º °+44´®®NJ9`À€U«Våää$$$h-vÝqÇãÇß´iÓöFh-vAÆ;dQ7Þ!g*Ôx‡ìŨñŽê²À±y󿀀ÿM›6I)÷îÝ;mÚ4ºìé‚lÀ.®iäêÚµ«E¯ì-° 2Þ!‹’¸ñ9S¡Æ;d/FwT—pö”ÚÚÚêêjºìï‚lÀ.®iäêÓ§Ï¥K—,zq¯ˆ€]1ÞUŠTºPãr¦BwÈ^Œï¨.+ðóögµ'((ˆ.-\ 7n,++KNN ­­­  Ë¶®_|qĈRÊõë× !¾ù曇~Xk°kôèÑéééáááîƒqqqúŠ€]1ÞUŠTºPã]™ ²g¡º {]öÅ{³7!JAÝ#‡ê’\;ÒÇ¥¬¹¨ìb¨.¢¨ñ¹7^‚Æ»J^Ϣ˶ð6Hć¸xñ¢Ëå ¡K Ñ…ªªª ÓWì"zïÊ\ìYäÇ Æ;ªË 8B!„Bñ ø`B!ѧOŸk|uÏž=Ú‰€]„âã Æ;ªË 8ß8={ö¬¬¬¼Æ7Òe!Ä"Æ&BuAÆ;dQ„뀌w`—p ô“˜˜øÆo\í«>úèÙ³g鲋B|Èx‡,ŠBˆupøÆùÙÏ~v÷Ýw_í«·Ýv]¶ràÚ‘F.¢ñYÑ Ôx‡ìÅ„®²a»¸v¤…+%%å_=xð ç Å"`ÑÔx‡œ©ðâ]± ¬gÑe¸l•••çÏŸ iÓ¦ ]6tUWW/[¶ìj_}ôÑG鲕‹kGZ¸222Œ'Ož\²dIzzzÇŽ‹‹‹ßyçaÆ™¢P,vý˜x÷ŠH 5Þ•¹ {ª ¬gÑ¥Þ½ ±î¸\®Ù³gÇÅŹžqqq™™™.—‹.[¹†z¯ÞsÏ=tÙÊEô¢_¿~û÷ïw?---0`€Ö"`^¼«©t¡Æ»2{¹¨ñŽê²À‘‘‘‘•••“““½hÑ¢øøø3fÐeCÑ‘ŠŠŠ‚‚‚ŠŠ ž+88¸É¿þ{÷î­µØïEM‹wõ.¤žE—máìáááyyyM9r$""‚.º ž‹kG¹RRR/^ì~úñÇ÷êÕËt‹J° 2Þ!‹j X¼«w!õ,TdϢ˶pöˆ   çiiiiPP]vsA6 `׎4r}ñÅÁÁÁIII÷Þ{ïm·Ýºk×.Ó-*EÀ.Èx‡,JâÆ;äL…ï½5ÞQ]VÀØ#ÒÒÒ† ÒøÁ999ƒ¾ö'OèòŠ ²a»¸v¤‘KJYZZºlÙ²?þñË–-+++³B¡X„ꂌwÈ¢$n¼CÎT¨ñÙ‹%h¼»L‡°G”””¤¦¦ !œNgLLŒÓéB¤¦¦–””Ðe7dÃvqíH#סC‡òóóÇ'Ožûì³¼¼¼C‡?>$$¤{÷îIIItÙÓe””déë«Aº6nÜXVV–œœZ[[ µØïE ÜxWæîY¨.°žE—}ñöN!W€kG¹6oÞàïï¿iÓ&)åÞ½{§M›¦µØE±Ôx‡ìŨñŽê²®›Cee¥ñÇÅ6mÚÐeO×øñãïºë®‘#GZñât™׎4rÝ{u••.—+$$DñóŸÿüç?ÿ¹Ö"<Â+ IDAT`—¤xW/RãBwe.Èž…ê‚ìYtÙ–fÞ>½ihhÈÌÌŒïСCxxx||üœ9sè²› ²a»vìØñøã#‰€]Bˆ   £ ˆP]ñY”ÀwÈ™ 5Þ!{±w`—é8¤”Þ>™1cÆúõë322zöìVUUµÿþùóç=zÞ¼ytÙÍEt„kG¹ˆýŒwÈ¢ˆ¦ Æ;d/&¾‹·÷`ë äã]?¾7 ]¶r¹\®Ù³gÇÅŹÃ*...33Óåri*v€ŒwÈ¢ïê]H= ÕÅžETÂØ# oê‚lÀ®ŒŒŒ„„„¬¬¬ìì윜œìììE‹ÅÇÇϘ1CS°‹hd¼C%qãr¦BwÈ^L`€¼q<ª ²a»¸v¤‘‹hd¼C%qãr¦BwÈ^L?ìgÏž5jÔçŸît:CCC«««kkkSSS׬YE—­\»wïîܹsãƒG½ãŽ;ÊÊÊLÑe N§³¨¨¨ÉG€ÊÊÊjkku¡ºzöìYYYyo(,,ÔKì ñY”Àwe.Èž…êÂëYtÙÀ& òëtÝ0 Øe,§Ì›7Ïýo—ÜÜÜŒŒ ÿ7ê(Bu%&&¾ñÆWûê£>zöìY½DÀ.7`ñ®X¤Ì…ïx3•w•.¼žE—­ñÖÒ3s¡î‘Cu•””¤¦¦ !œNgLLŒÓéB¤¦¦–””h*Bu]ûÿþ=÷Ü£ØÕ¤xW/RãBwȽññ®Ò…׳è²3€=ò¢¨.Ȇì2ÈÍÍ]½zõ›o¾¹zõêÜÜ\‹,*EÀ.¢ñY”Äw¼™Ê j¼CöbâËp ´G Þ{Õ%÷Èa»è½éÒE„炌wȢܠÆ;äÞx—ê]EÑe;¼=ë ä…ø€]`{äP]\;¢Ë‹"`d¼CÕ°xWïBêY¨.È¢è²-€=õÞƒ.Ô\@uAÞ'ƒ.]DÀ.Èx‡,JâÆ;äL…šŒwº¼î²ÀyÑTj. º¸vD—EÀ.Èx‡,JâÆ;äL…šŒwº¼î²ÀyÑTj. º¸vD—EÀ.Èx‡,JâÆ;äL…šŒwº¼î²^Ë /炼É!° ïÞƒti$v€Å»b‘2j¼ó>ÀtyQD—v.KP>râP÷È¡º¸vD—EÀ.¢¨ñ¹751ïtyÝe\¾qºuëvûí··oßþÙgŸ¥Ëþ®³gÏŽ5êóÏ?w:¡¡¡ÕÕÕµµµ©©©kÖ¬‰ŠŠ¢Ën.®ÑåEž 2Þ!‹2@we.àž…ê‚,Š.ÂøÆ9uêÔŸþô§;wž8q‚.û» PsÕE1Èx‡,ª1¨ñ9SB´€0!ÄvpíH#—büøñwÝu×È‘#aDÀ.Bˆé Æ;d/¸ñŽê²‚fÞ>BTЭ[·I“&½üòËtiáúè£êëëW¬X#v ! Ï;‡$v]@we.Èž…êBíYtÙ®Ÿuª‹B|Ôx‡ÜOÑ À„BÌ¡²²Òø ]“ûŽè+vBˆƒï¨.áhB!ÑÐЙ™Þ¡C‡ðððøøø9sæ444h*vBˆƒï¨.+ðóö €P___[[êçgÕôòåËBˆfÍ”þ͵.Bˆ‰<ûì³ëׯöÙg{öìVUUµÿþùóçWWWÏ›7OG°Ë d¼CE1ÔxGuY‚WïBŒÀo¼ÑµkW‡Ã!„hÑ¢ÅwÞ¹gÏ+DC‡={v“ƒ_~ùå‹/¾h…µ.Bˆé„‡‡çåå59xäÈ‘ˆˆMEÀ. ïEB¬5ÞQ]VÀ?azÄœ9sþò—¿Œ9òÿý¿ÿû»ßý®K—. صk—é®={öÜÿýÆãººº5kÖ!Z·nýÞ{ï™îB­‹èH}}}EE…Ëå²èõ/_¾l,é¨Äꢄںêêê"##›ŒŠŠºxñ¢¦"`d¼CE4,ÞÝ õbÔxGuY‚·'p½‰‰‰9|ø°ñøë¯¿¾õÖ[¥”¯¾újÿþýMwµhÑ¢ªªÊxüý÷ßGEEI)+++Mw¡ÖePWWW^^~éÒ%‹^_JÙÐÐÐÐÐ`Ýë_¼ºÔ,épo…礥¥ 2¤ñƒsrr}Új×¥K—>\^^nµH"ÖÅæä9*ëR¶¤Ã½žãr¹fÏž羺D\\\ff¦ËåÒTì’ˆñ.A‹BwÈ™ 5Þ!{1j¼£º¬€0ñØœX]lNž£².eK:Ü[A|°x7À+ 5Þ!g*Ôx‡ìÅ„pöˆ¼¼¼Î; !:tèpâĉ•+WvíÚ5--­¸¸Øt—Êk9@ÖÅæä9Àý r‘JâÖEnÈx‡,JâÆ;g*S€ŒwÈ¢ˆ qH)¹Qî»ï¾°°°'Ÿ|rñâÅßÿ½bĈ«W¯Ž‰‰Y»v­¹®3f¬_¿>##£gÏžaaaUUUû÷ïŸ?þèÑ£çÍ›g® µ.Åh|ðk¢²®ŠŠŠÖ­[7kÖÌ:…›ÊÊJ£¨6mÚX*RY”PX—¢¾¾¾¶¶644ÔÏÏϊ׿|ù²BÙÎÀꢄں ã²(¯Ù¶P{–m[H=K°mÙoOàzÓ¦M›ÊÊJ)eii©âÌ™3RÊ3g΄‡‡›îRy-Ôº$â9¼º”-épo…)¼ñÆ]»vu8Bˆ-ZÜyç{öì1Ý2tèÐÙ³g79øå—_¾øâ‹¦»¤ª¢¤Úº ã²(7xñn ².¤ž%AÛdÏ’l[v…°G´jÕê‡~RÖÕÕ !jjj¤”555ÁÁÁ¦»T^˲.6'ÏQY×½÷ÞûðÃggg?øàƒ©©©©©©‹/îׯßÈ‘#Í©¼O†²¢¤Úº233cccgÍš5kÖ¬ØØØiÓ¦Mž<ÙétîܹÓ\QtttNNŽñøâÅ‹«W¯–RæååÝrË-护¢¤Úº ã²(‰ï3j¼CöbÔxG­Ë 8{DÏž=—.]*¥\¼xqûöíŸ~úé¼¼¼§Ÿ~úöÛo7Ý¥òZu±9yŽÊº”-épo…çÄÄÄ>|Øxüõ×_ßzë­RÊW_}µÿþæŠZ´hQUUe<þþû¢¤”•••护¢¤Úº ã²(‰ï3j¼CöbÔxG­Ë 8{Ć üüüœNgXXØÑ£Gûôé#„ˆŒŒÜ»w¯é.•×r€¬‹ÍÉsTÖ¥lI‡{+<Ç]—”òâÅ‹!!!RÊS§N™Þcbb3A­‹ÜñY”ÔxWYdÏ ñY±#^¿õ¦}ûöÎkb®îæ›oþî»ï¤”;w–R·oßÞ\…j]êÜø'•Ô¥rI‡{+<ç±Ç[»v­E/îª 2Þ!‹òm ¬gIжÙ³è²-VýÑ(,,T©+--w?mӦͅ ¬AÖ5nܸwÞyçj_}øá‡¯±€pÔÔÔŒ;vÆ B)åÈ‘#ÿú׿›¯Ìµ®/¾øbÏž=Wûê'Ÿ|b¢ëÅ_üá‡JJJÜë9Ï?ÿ¼ë9*‹ ëBž;wΊWö–ÕïE ÜxWYdÏ m ²gÑe[¸ZÚ·oðàÁÖ­['%%}óÍ7³fÍ:xðà‡~èíóò5u¡î‘C­+!!¡¼¼üßPSSc–+11ñ³Ï>‹OJJÊËË+))éÓ§ÿ¤VY”PX!W²m)+ 5ÞUÖÙ³hÛbÏ"Jñêú3ù73fŒ±Ù mÛ¶ÁÁÁ?ûÙÏ ¼}R& ¦.Ô=r¨u©¤U«VÆ£¨~ø¡M›6^=#sPY×… ,zå«QQQQPPð㜘ˆú¢¤’ºTÙ¶”…ï*ë‚ìY´ma÷,ɶe38kƒËå2.»·uëÖ¸\.oŸ‘9@ÖÙœ$h]íÚµ«¬¬”RvîÜÙår=ÿüó¿ùÍo¼}R& ².!ÄôéÓ›LOOŸ9s¦¹"—Ë5{ö츸8÷ßpãââ233­È eEIµu©2Þ!‹’ ñY”m[=K²mÙÀ„˜ds’ uA.RIµu !Úµk—™™ÙøàöíÛ“““Íedd$$$deeeggçäädgg/Z´(>>~ÆŒ护¢¤Úº¹"ñY”m[=K²mÙÀúQYYùÔSOyû,Ì©.Èæ$AëB]ÏQY—"777>>~ñâÅîƒÅÅÅ-[¶4Wž——×äà‘#G"""ÌI…EIµuy¤xwVd¼C%AÛdÏ’l[v…°~…‡‡{û,Ì©.Èæ$që""„¨««ËÏÏŽŽ^²d‰qpß¾}±±±æŠ‚‚‚~üA£ÒÒÒ   sERaQRm]^)ÞÝ€ïEÏAwÔº¬ ™5—Ö"ħiÞ¼¹ŸŸŸâ®»îJIIiÞ¼¹·ÏÈPë28wîÜï~÷;oŸ…ù(««S§NÛ·oŸ5kÖ¨Q£-Zôè£2Ä\ůýëôôôüü|÷‘ÜÜܱcÇ4È\‘E oÔEH 㲨Æ@¶-¤ž%ضì `m(ùeeeRJãqEE…·ÏËSPë2€lN´®ÚÚÚ>øÀÛga> ê p8Bˆäää}ûö¹\®… öêÕëå—_6W´|ùòêêꤤ¤V­ZÅÆÆ¶jÕªk×®µµµË–-3W$%ÔÖ¥Èx‡,ª1ñY”m[H=K°mÙÞXŒ¿b444ÈLNN>xð ÷NÊPë28}útJJʵo£§#uA%ëÊËË;tèÐùóçCBBºwïž””äí32¼º 㲨Æà%†-J€ÖY^¼è[—ßO ±.—Ëx ¨u]())1¸×s„-Z´÷êyyŠúº***Z·nݬ™å‹’’’Ú¶mktÜ6mÚXêRV”P[— ã²(¢m ¸g ¶-ûÁ-Є˜ê9ȺâþEïÞ½+++Ç ðöyyŠÊºòóó“’’""":vìøí·ßþçþg·n݆êþY444dffÆÇLJ‡‡wèÐ!<<<>>~Μ9 抄¢„Úº¹"ñY”m[=K°mÙnÖÔ¿:#Õ…ºGµ.¤ßÀÆ(¨ë¾ûî {òÉ'/^üý÷ß !FŒ±zõꘘ˜µkך(š1cÆúõë322zöìVUUµÿþùóç=zÞ¼y&Š„Â¢„Úº¼ä› ¬(Èx‡,ª1`¿„H=K°mÙo\zšxDuuõ_þòoŸ…ù@ÖvŸ 7uA%•ÔÕ¦M›ÊÊJ)eii©âÌ™3RÊ3gΘîUyãAeEIýo¨ø“@Æ;dQ4 !‹’ u!õ,ɶeW¸Z?BBBž{î9oŸ…ù ÖE<ÿüç?N§"88Xjü÷Ÿÿü§¹¢ºººÈÈÈ&£¢¢.^¼h®H(,J¨­Ë+@Æ;dQ„ø¨ñŽZ—pÖÊÊÊÂÂÂÊÊJoŸˆÉ ÖEt!$$dúôéÞ> óQPW§NÞyç!ÄŠ+Ú·oÿüóÏççç?ÿüó=zô0W¤òƃʊúßPñ'ŒwÈ¢ˆ^@¶-¤ž%ضl‹·— Éõâr¹fÏžçþ—™™ér¹¼}jZ—ê9ԺȱaÃ???§ÓvôèÑ>}ú!"##÷îÝk®¨¤¤$55Uát:cbbŒ?u§¦¦–””˜+’ ‹’jëR d¼CÕÈx‡,ŠÜ0¨ñŽZ—ð"XÚ ýÇͯj]D;*++u¼”ÿO¢¦®ãÇ:t¨oß¾111RÊ3gÎDGG7¾‰(»ñ Ê¢„Î7T¼ñYÑȶ…׳Û– ñêøMþ tÿ¸ùÕ@­Ë ¢¢¢   ¢¢ÂÛ'b2Hu¡®ç Ö%¥üî»ïvîÜyúôioŸˆÉàÕïE5)ÞÝ€ïE¹Á‹w}ë⬠AAAÿ_{wšeÝÇüÞ“I”³HKj•S„èe‘›uPô"d½A2hÅ “: "B!;Ij„$ØÑEDPAdÑ[ÛZ­½ẞƒöøôàžzv=÷åÿ÷ÿ|ŽÆ}Ëöûâø^ûÝ×u_÷v÷èèèµ×^[ÉùÄO\úÂó¹sç}ôÑz¿§+d®¨çsBæºãŽ;&&&Š¢hkk»xñâ[o½õØcU=T ¢æ*"Ö{4TÈzªZï!Cqë=õ\à” .]ºôÖ[oݹsçüQõ8¥‰—+jGÍUÅÀÀ@__ßÇÜ××700Põ8¥ –ë¹çžûâ‹/Š¢¸ùæ››››ï¹çžï¾û®ê¡J5W±Þ‹ ¡BÖ{ÈPó‚Õ{]¼PQë=õ\.NÉÐÐPGGÇéÓ§_yå•©©©={ö¬]»¶ê¡J5W¼käêâåšýòË/ׯ_¿fÍšªg)S¼\sssEQ,Y²¤¿¿ÕªUwß}÷UW]UõP%ˆš«´ÞC†ª‹Wïµ ¡âÕ{-h¨¨õž|®J×oþžÁÁÁ+VÔ¿þì³Ïê÷‹ðòsÈ\!¯‘+‚æ y>§ˆ›‹„„¬÷¡Š õ2T´ÞC†âÊôªpþª‘‘‘±±±¢(FFFFFF6lØðÕW_íß¿¿³³³êÑ%j®‰‰‰^xáصkWè:‹¨¹š››?ÞßßßÙÙyáÂ…ªÇ)MÔ\?ÿüók¯½Võå –+d½‡ U²ÞC†ª Yï!CÕ«÷y©æªn÷æï‰úŸ5WýÁÐÐÐã?þÐC}ûí·UOT޹¢žÏ‰š«ø÷h‘˲ÞC†ª \ïÁBAë=d¨yÁê}^¢¹–,¼så8yòäèèèæÍ›:Tõ,eŠš«®¥¥åÀŸþyWWWOOÏ«¯¾ÚÔÔTõP%ˆ”ëÒó9µZ­~>§»»{ß¾}IÿNFÍEBBÖ{ÈP—ŠTïó‚… Yï!CqÅr¬”LNN¾ÿþûï¼óNÕƒ”,^®‘‘‘ááá7ž={¶þÈøøxww÷ÜÜ\Ò=2×%]!sÕÿ0ªÕj—þ^}õÕ+V¬¨t®ÅŠš«±ÞkAC…¬÷¡jAë=d¨ZÜzO=—ʵÇCæ:uêÔåÎçtttT2R)BæZ²ä_W-ÍÍÍÕï9y×]w:uªº¡J5 Yï!CÕ‚Ö{ÈPµ¸õžz.—@§aÛ¶mŸ|òÉåžÝ²eËÞ½{9OY¢æŠz\È\“““Û·oOúûŸBæºxñbý‹úçÐŒW;OYBæ Yï!CÕ…¬÷¡jAë=d¨ZÐz¯¥ŸËœ†Ã‡;vìrÏ'j.àÿ!d½‡ U Zï!Cq%³CÉ¢öxÔ\¤hbbâ×_]¾|ù7ÞXõ,eŠš‹+\ÈzŠtE­÷DsY€HÃÜÜܻᄏ{÷î¡¡¡ú#·ÝvÛË/¿ÜÛÛ[ÿ †DE͹¨õžz.ï ½½½ûöíëíí½ï¾ûn¸á†_~ùåĉï½÷ÞäääŽ;ªžî5@æ¢Ö{깜 +W®ú¨êÑ%j.€ÌE­÷Ôsy0)9þüéӧ럻pï½÷¶··W=Q9¢æÈ\ÔzO7—»@ŒÙÙÙ'N¬_¿~Íš5UÏR¦¨¹2µÞ“ÎuÕÛo¿]õ ð—üøã]]]}}}×\sÍý÷ßßÔÔTõD刚 sQë=é\Þ @Jš››?ÞßßßÙÙyáÂ…ªÇ)MÔ\™‹Zïéæ²˜–––¼øâ‹]]]»ví s3‹¨¹2µÞÍå&X¤adddxxxãÆgÏž­?2>>ÞÝÝ=77wèСjg[Œ¨¹2µÞSÏe ¼Å(écYÔ\™‹Zï©çrhÒpòäÉÑÑÑÍ›7'ñó_5@æ¢Ö{ê¹,À¤¡££crrrûöíUÏR¦¨¹2µÞSÏåh²à.Ð$`Û¶m <»eË–†MR®¨¹2µÞär 4 8|øð±cÇ.÷ìÁƒ9L‰¢æÈ\ÔzË%Ð$ µµu|||0==ݰaJ5@æ¢Ö{€\`²à=ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @,ÀdÁ @þ ºBuûIEND®B`‚sleef-3.3.1/doc/html/tutorial.c000066400000000000000000000031301333715643700163450ustar00rootroot00000000000000// gcc tutorial.c -lsleef -lsleefdft -lm #include #include #include #include #include #include "sleef.h" #include "sleefdft.h" #define THRES 1e-4 typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PI * _Complex_I / n) * kn); } void forward(cmpl *ts, cmpl *fs, int len) { for(int k=0;k THRES) || (fabs(sy[(i*2+1)] - cimag(fs[i])) > THRES)) { success = 0; } } printf("%s\n", success ? "OK" : "NG"); free(fs); free(ts); Sleef_free(sy); Sleef_free(sx); SleefDFT_dispose(p); exit(success); } sleef-3.3.1/doc/html/vectest.c000066400000000000000000000005131333715643700161610ustar00rootroot00000000000000#include #include #include #include #define N 256 __attribute__ ((__aligned__(16))) double a[N]; int main(int argc, char **argv) { srand(time(NULL)); for(int i = 0;i < N;i++) a[i] = rand(); for(int i = 0;i < N;i++) a[i] = sin(a[i]); for(int i = 0;i < N;i++) printf("%g\n", a[i]); } sleef-3.3.1/doc/html/x86.xhtml000066400000000000000000007363411333715643700160620ustar00rootroot00000000000000 SLEEF Documentation

SLEEF Documentation - Math library reference

Table of contents

Function naming convention

The naming convention for the vectorized math functions is shown in Fig. 3.1. The function name is a concatenation of the following items, in this order.


  • String "Sleef_".
  • Name of the corresponding double precision function in math.h.
  • Data type specifier of a vector element, "d" and "f" for double and single precision functions, respectively.
  • The number of elements in a vector.
  • Accuracy specifier, a concatenation of string "_u" and 10 times the maximum error for typical input domain in ULP(two digits). There is no field in the name, if the function is expected to always return the correctly rounded value.
  • Vector extension specifier.
    • (Nothing) : Dispatcher automatically chooses the fastest available vector extension
    • sse2 : SSE2
    • sse4 : SSE4.1
    • avx2128 : AVX2+FMA3 instructions utilized for 128 bit computation
    • avx : AVX
    • fma4 : AMD FMA4
    • avx2 : AVX2+FMA3
    • avx512f : AVX512F

naming convention
Fig. 3.1: Naming convention of vectorized functions

Data types for x86 architecture

Sleef___m128_2

Description

Sleef___m128_2 is a data type for storing two __m128 values, which is defined in sleef.h as follows:

typedef struct {
  __m128 x, y;
} Sleef___m128_2;

Sleef___m128d_2

Description

Sleef___m128d_2 is a data type for storing two __m128d values, which is defined in sleef.h as follows:

typedef struct {
  __m128d x, y;
} Sleef___m128d_2;

Sleef___m256_2

Description

Sleef___m256_2 is a data type for storing two __m256 values, which is defined in sleef.h as follows:

typedef struct {
  __m256 x, y;
} Sleef___m256_2;

Sleef___m256d_2

Description

Sleef___m256d_2 is a data type for storing two __m256d values, which is defined in sleef.h as follows:

typedef struct {
  __m256d x, y;
} Sleef___m256d_2;

Sleef___m512_2

Description

Sleef___m512_2 is a data type for storing two __m512 values, which is defined in sleef.h as follows:

typedef struct {
  __m512 x, y;
} Sleef___m512_2;

Sleef___m512d_2

Description

Sleef___m512d_2 is a data type for storing two __m512d values, which is defined in sleef.h as follows:

typedef struct {
  __m512d x, y;
} Sleef___m512d_2;

Trigonometric Functions

Vectorized double precision sine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_sind2_u10(__m128d a);
__m128d Sleef_sind2_u10sse2(__m128d a);
__m128d Sleef_sind2_u10sse4(__m128d a);
__m128d Sleef_sind2_u10avx2128(__m128d a);

__m256d Sleef_sind4_u10(__m256d a);
__m256d Sleef_sind4_u10avx(__m256d a);
__m256d Sleef_sind4_u10fma4(__m256d a);
__m256d Sleef_sind4_u10avx2(__m256d a);

__m512d Sleef_sind8_u10(__m512d a);
__m512d Sleef_sind8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sin_u10 with the same accuracy specification.


Vectorized single precision sine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_sinf4_u10(__m128 a);
__m128 Sleef_sinf4_u10sse2(__m128 a);
__m128 Sleef_sinf4_u10sse4(__m128 a);
__m128 Sleef_sinf4_u10avx2128(__m128 a);

__m256 Sleef_sinf8_u10(__m256 a);
__m256 Sleef_sinf8_u10avx(__m256 a);
__m256 Sleef_sinf8_u10fma4(__m256 a);
__m256 Sleef_sinf8_u10avx2(__m256 a);

__m512 Sleef_sinf16_u10(__m512 a);
__m512 Sleef_sinf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinf_u10 with the same accuracy specification.


Vectorized double precision sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_sind2_u35(__m128d a);
__m128d Sleef_sind2_u35sse2(__m128d a);
__m128d Sleef_sind2_u35sse4(__m128d a);
__m128d Sleef_sind2_u35avx2128(__m128d a);

__m256d Sleef_sind4_u35(__m256d a);
__m256d Sleef_sind4_u35avx(__m256d a);
__m256d Sleef_sind4_u35fma4(__m256d a);
__m256d Sleef_sind4_u35avx2(__m256d a);

__m512d Sleef_sind8_u35(__m512d a);
__m512d Sleef_sind8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sin_u35 with the same accuracy specification.


Vectorized single precision sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_sinf4_u35(__m128 a);
__m128 Sleef_sinf4_u35sse2(__m128 a);
__m128 Sleef_sinf4_u35sse4(__m128 a);
__m128 Sleef_sinf4_u35avx2128(__m128 a);

__m256 Sleef_sinf8_u35(__m256 a);
__m256 Sleef_sinf8_u35avx(__m256 a);
__m256 Sleef_sinf8_u35fma4(__m256 a);
__m256 Sleef_sinf8_u35avx2(__m256 a);

__m512 Sleef_sinf16_u35(__m512 a);
__m512 Sleef_sinf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinf_u35 with the same accuracy specification.


Vectorized double precision cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_cosd2_u10(__m128d a);
__m128d Sleef_cosd2_u10sse2(__m128d a);
__m128d Sleef_cosd2_u10sse4(__m128d a);
__m128d Sleef_cosd2_u10avx2128(__m128d a);

__m256d Sleef_cosd4_u10(__m256d a);
__m256d Sleef_cosd4_u10avx(__m256d a);
__m256d Sleef_cosd4_u10fma4(__m256d a);
__m256d Sleef_cosd4_u10avx2(__m256d a);

__m512d Sleef_cosd8_u10(__m512d a);
__m512d Sleef_cosd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cos_u10 with the same accuracy specification.


Vectorized single precision cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_cosf4_u10(__m128 a);
__m128 Sleef_cosf4_u10sse2(__m128 a);
__m128 Sleef_cosf4_u10sse4(__m128 a);
__m128 Sleef_cosf4_u10avx2128(__m128 a);

__m256 Sleef_cosf8_u10(__m256 a);
__m256 Sleef_cosf8_u10avx(__m256 a);
__m256 Sleef_cosf8_u10fma4(__m256 a);
__m256 Sleef_cosf8_u10avx2(__m256 a);

__m512 Sleef_cosf16_u10(__m512 a);
__m512 Sleef_cosf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cosf_u10 with the same accuracy specification.


Vectorized double precision cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_cosd2_u35(__m128d a);
__m128d Sleef_cosd2_u35sse2(__m128d a);
__m128d Sleef_cosd2_u35sse4(__m128d a);
__m128d Sleef_cosd2_u35avx2128(__m128d a);

__m256d Sleef_cosd4_u35(__m256d a);
__m256d Sleef_cosd4_u35avx(__m256d a);
__m256d Sleef_cosd4_u35fma4(__m256d a);
__m256d Sleef_cosd4_u35avx2(__m256d a);

__m512d Sleef_cosd8_u35(__m512d a);
__m512d Sleef_cosd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cos_u35 with the same accuracy specification.


Vectorized single precision cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_cosf4_u35(__m128 a);
__m128 Sleef_cosf4_u35sse2(__m128 a);
__m128 Sleef_cosf4_u35sse4(__m128 a);
__m128 Sleef_cosf4_u35avx2128(__m128 a);

__m256 Sleef_cosf8_u35(__m256 a);
__m256 Sleef_cosf8_u35avx(__m256 a);
__m256 Sleef_cosf8_u35fma4(__m256 a);
__m256 Sleef_cosf8_u35avx2(__m256 a);

__m512 Sleef_cosf16_u35(__m512 a);
__m512 Sleef_cosf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cosf_u35 with the same accuracy specification.


Vectorized double precision combined sine and cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef___m128d_2 Sleef_sincosd2_u10(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u10sse2(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u10sse4(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u10avx2128(__m128d a);

Sleef___m256d_2 Sleef_sincosd4_u10(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u10avx(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u10fma4(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u10avx2(__m256d a);

Sleef___m512d_2 Sleef_sincosd8_u10(__m512d a);
Sleef___m512d_2 Sleef_sincosd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincos_u10 with the same accuracy specification.


Vectorized single precision combined sine and cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

Sleef___m128_2 Sleef_sincosf4_u10(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u10sse2(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u10sse4(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u10avx2128(__m128 a);

Sleef___m256_2 Sleef_sincosf8_u10(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u10avx(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u10fma4(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u10avx2(__m256 a);

Sleef___m512_2 Sleef_sincosf16_u10(__m512 a);
Sleef___m512_2 Sleef_sincosf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincosf_u10 with the same accuracy specification.


Vectorized double precision combined sine and cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef___m128d_2 Sleef_sincosd2_u35(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u35sse2(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u35sse4(__m128d a);
Sleef___m128d_2 Sleef_sincosd2_u35avx2128(__m128d a);

Sleef___m256d_2 Sleef_sincosd4_u35(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u35avx(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u35fma4(__m256d a);
Sleef___m256d_2 Sleef_sincosd4_u35avx2(__m256d a);

Sleef___m512d_2 Sleef_sincosd8_u35(__m512d a);
Sleef___m512d_2 Sleef_sincosd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincos_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef___m128_2 Sleef_sincosf4_u35(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u35sse2(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u35sse4(__m128 a);
Sleef___m128_2 Sleef_sincosf4_u35avx2128(__m128 a);

Sleef___m256_2 Sleef_sincosf8_u35(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u35avx(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u35fma4(__m256 a);
Sleef___m256_2 Sleef_sincosf8_u35avx2(__m256 a);

Sleef___m512_2 Sleef_sincosf16_u35(__m512 a);
Sleef___m512_2 Sleef_sincosf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincosf_u35 with the same accuracy specification.


Vectorized double precision sine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_sinpid2_u05(__m128d a);
__m128d Sleef_sinpid2_u05sse2(__m128d a);
__m128d Sleef_sinpid2_u05sse4(__m128d a);
__m128d Sleef_sinpid2_u05avx2128(__m128d a);

__m256d Sleef_sinpid4_u05(__m256d a);
__m256d Sleef_sinpid4_u05avx(__m256d a);
__m256d Sleef_sinpid4_u05fma4(__m256d a);
__m256d Sleef_sinpid4_u05avx2(__m256d a);

__m512d Sleef_sinpid8_u05(__m512d a);
__m512d Sleef_sinpid8_u05avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinpi_u05 with the same accuracy specification.


Vectorized single precision sine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_sinpif4_u05(__m128 a);
__m128 Sleef_sinpif4_u05sse2(__m128 a);
__m128 Sleef_sinpif4_u05sse4(__m128 a);
__m128 Sleef_sinpif4_u05avx2128(__m128 a);

__m256 Sleef_sinpif8_u05(__m256 a);
__m256 Sleef_sinpif8_u05avx(__m256 a);
__m256 Sleef_sinpif8_u05fma4(__m256 a);
__m256 Sleef_sinpif8_u05avx2(__m256 a);

__m512 Sleef_sinpif16_u05(__m512 a);
__m512 Sleef_sinpif16_u05avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinpif_u05 with the same accuracy specification.


Vectorized double precision cosine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_cospid2_u05(__m128d a);
__m128d Sleef_cospid2_u05sse2(__m128d a);
__m128d Sleef_cospid2_u05sse4(__m128d a);
__m128d Sleef_cospid2_u05avx2128(__m128d a);

__m256d Sleef_cospid4_u05(__m256d a);
__m256d Sleef_cospid4_u05avx(__m256d a);
__m256d Sleef_cospid4_u05fma4(__m256d a);
__m256d Sleef_cospid4_u05avx2(__m256d a);

__m512d Sleef_cospid8_u05(__m512d a);
__m512d Sleef_cospid8_u05avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cospi_u05 with the same accuracy specification.


Vectorized single precision cosine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_cospif4_u05(__m128 a);
__m128 Sleef_cospif4_u05sse2(__m128 a);
__m128 Sleef_cospif4_u05sse4(__m128 a);
__m128 Sleef_cospif4_u05avx2128(__m128 a);

__m256 Sleef_cospif8_u05(__m256 a);
__m256 Sleef_cospif8_u05avx(__m256 a);
__m256 Sleef_cospif8_u05fma4(__m256 a);
__m256 Sleef_cospif8_u05avx2(__m256 a);

__m512 Sleef_cospif16_u05(__m512 a);
__m512 Sleef_cospif16_u05avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef___m128d_2 Sleef_sincospid2_u05(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u05sse2(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u05sse4(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u05avx2128(__m128d a);

Sleef___m256d_2 Sleef_sincospid4_u05(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u05avx(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u05fma4(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u05avx2(__m256d a);

Sleef___m512d_2 Sleef_sincospid8_u05(__m512d a);
Sleef___m512d_2 Sleef_sincospid8_u05avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincospi_u05 with the same accuracy specification.


Vectorized single precision combined sine and cosine functions with 0.506 ULP error bound

Synopsis

#include <sleef.h>

Sleef___m128_2 Sleef_sincospif4_u05(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u05sse2(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u05sse4(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u05avx2128(__m128 a);

Sleef___m256_2 Sleef_sincospif8_u05(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u05avx(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u05fma4(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u05avx2(__m256 a);

Sleef___m512_2 Sleef_sincospif16_u05(__m512 a);
Sleef___m512_2 Sleef_sincospif16_u05avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincospif_u05 with the same accuracy specification.


Vectorized double precision combined sine and cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef___m128d_2 Sleef_sincospid2_u35(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u35sse2(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u35sse4(__m128d a);
Sleef___m128d_2 Sleef_sincospid2_u35avx2128(__m128d a);

Sleef___m256d_2 Sleef_sincospid4_u35(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u35avx(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u35fma4(__m256d a);
Sleef___m256d_2 Sleef_sincospid4_u35avx2(__m256d a);

Sleef___m512d_2 Sleef_sincospid8_u35(__m512d a);
Sleef___m512d_2 Sleef_sincospid8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincospi_u35 with the same accuracy specification.


Vectorized single precision combined sine and cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

Sleef___m128_2 Sleef_sincospif4_u35(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u35sse2(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u35sse4(__m128 a);
Sleef___m128_2 Sleef_sincospif4_u35avx2128(__m128 a);

Sleef___m256_2 Sleef_sincospif8_u35(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u35avx(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u35fma4(__m256 a);
Sleef___m256_2 Sleef_sincospif8_u35avx2(__m256 a);

Sleef___m512_2 Sleef_sincospif16_u35(__m512 a);
Sleef___m512_2 Sleef_sincospif16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sincospif_u35 with the same accuracy specification.


Vectorized double precision tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_tand2_u10(__m128d a);
__m128d Sleef_tand2_u10sse2(__m128d a);
__m128d Sleef_tand2_u10sse4(__m128d a);
__m128d Sleef_tand2_u10avx2128(__m128d a);

__m256d Sleef_tand4_u10(__m256d a);
__m256d Sleef_tand4_u10avx(__m256d a);
__m256d Sleef_tand4_u10fma4(__m256d a);
__m256d Sleef_tand4_u10avx2(__m256d a);

__m512d Sleef_tand8_u10(__m512d a);
__m512d Sleef_tand8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tan_u10 with the same accuracy specification.


Vectorized single precision tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_tanf4_u10(__m128 a);
__m128 Sleef_tanf4_u10sse2(__m128 a);
__m128 Sleef_tanf4_u10sse4(__m128 a);
__m128 Sleef_tanf4_u10avx2128(__m128 a);

__m256 Sleef_tanf8_u10(__m256 a);
__m256 Sleef_tanf8_u10avx(__m256 a);
__m256 Sleef_tanf8_u10fma4(__m256 a);
__m256 Sleef_tanf8_u10avx2(__m256 a);

__m512 Sleef_tanf16_u10(__m512 a);
__m512 Sleef_tanf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanf_u10 with the same accuracy specification.


Vectorized double precision tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_tand2_u35(__m128d a);
__m128d Sleef_tand2_u35sse2(__m128d a);
__m128d Sleef_tand2_u35sse4(__m128d a);
__m128d Sleef_tand2_u35avx2128(__m128d a);

__m256d Sleef_tand4_u35(__m256d a);
__m256d Sleef_tand4_u35avx(__m256d a);
__m256d Sleef_tand4_u35fma4(__m256d a);
__m256d Sleef_tand4_u35avx2(__m256d a);

__m512d Sleef_tand8_u35(__m512d a);
__m512d Sleef_tand8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tan_u35 with the same accuracy specification.


Vectorized single precision tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_tanf4_u35(__m128 a);
__m128 Sleef_tanf4_u35sse2(__m128 a);
__m128 Sleef_tanf4_u35sse4(__m128 a);
__m128 Sleef_tanf4_u35avx2128(__m128 a);

__m256 Sleef_tanf8_u35(__m256 a);
__m256 Sleef_tanf8_u35avx(__m256 a);
__m256 Sleef_tanf8_u35fma4(__m256 a);
__m256 Sleef_tanf8_u35avx2(__m256 a);

__m512 Sleef_tanf16_u35(__m512 a);
__m512 Sleef_tanf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanf_u35 with the same accuracy specification.

Power, exponential, and logarithmic functions

Vectorized double precision power functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_powd2_u10(__m128d a, __m128d b);
__m128d Sleef_powd2_u10sse2(__m128d a, __m128d b);
__m128d Sleef_powd2_u10sse4(__m128d a, __m128d b);
__m128d Sleef_powd2_u10avx2128(__m128d a, __m128d b);

__m256d Sleef_powd4_u10(__m256d a, __m256d b);
__m256d Sleef_powd4_u10avx(__m256d a, __m256d b);
__m256d Sleef_powd4_u10fma4(__m256d a, __m256d b);
__m256d Sleef_powd4_u10avx2(__m256d a, __m256d b);

__m512d Sleef_powd8_u10(__m512d a);
__m512d Sleef_powd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_pow_u10 with the same accuracy specification.


Vectorized single precision power functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_powf4_u10(__m128 a, __m128 b);
__m128 Sleef_powf4_u10sse2(__m128 a, __m128 b);
__m128 Sleef_powf4_u10sse4(__m128 a, __m128 b);
__m128 Sleef_powf4_u10avx2128(__m128 a, __m128 b);

__m256 Sleef_powf8_u10(__m256 a, __m256 b);
__m256 Sleef_powf8_u10avx(__m256 a, __m256 b);
__m256 Sleef_powf8_u10fma4(__m256 a, __m256 b);
__m256 Sleef_powf8_u10avx2(__m256 a, __m256 b);

__m512 Sleef_powf16_u10(__m512 a, __m512 b);
__m512 Sleef_powf16_u10avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_powf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_logd2_u10(__m128d a);
__m128d Sleef_logd2_u10sse2(__m128d a);
__m128d Sleef_logd2_u10sse4(__m128d a);
__m128d Sleef_logd2_u10avx2128(__m128d a);

__m256d Sleef_logd4_u10(__m256d a);
__m256d Sleef_logd4_u10avx(__m256d a);
__m256d Sleef_logd4_u10fma4(__m256d a);
__m256d Sleef_logd4_u10avx2(__m256d a);

__m512d Sleef_logd8_u10(__m512d a);
__m512d Sleef_logd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log_u10 with the same accuracy specification.


Vectorized single precision natural logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_logf4_u10(__m128 a);
__m128 Sleef_logf4_u10sse2(__m128 a);
__m128 Sleef_logf4_u10sse4(__m128 a);
__m128 Sleef_logf4_u10avx2128(__m128 a);

__m256 Sleef_logf8_u10(__m256 a);
__m256 Sleef_logf8_u10avx(__m256 a);
__m256 Sleef_logf8_u10fma4(__m256 a);
__m256 Sleef_logf8_u10avx2(__m256 a);

__m512 Sleef_logf16_u10(__m512 a);
__m512 Sleef_logf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_logf_u10 with the same accuracy specification.


Vectorized double precision natural logarithmic functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_logd2_u35(__m128d a);
__m128d Sleef_logd2_u35sse2(__m128d a);
__m128d Sleef_logd2_u35sse4(__m128d a);
__m128d Sleef_logd2_u35avx2128(__m128d a);

__m256d Sleef_logd4_u35(__m256d a);
__m256d Sleef_logd4_u35avx(__m256d a);
__m256d Sleef_logd4_u35fma4(__m256d a);
__m256d Sleef_logd4_u35avx2(__m256d a);

__m512d Sleef_logd8_u35(__m512d a);
__m512d Sleef_logd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log_u35 with the same accuracy specification.


Vectorized single precision natural logarithmic functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_logf4_u35(__m128 a);
__m128 Sleef_logf4_u35sse2(__m128 a);
__m128 Sleef_logf4_u35sse4(__m128 a);
__m128 Sleef_logf4_u35avx2128(__m128 a);

__m256 Sleef_logf8_u35(__m256 a);
__m256 Sleef_logf8_u35avx(__m256 a);
__m256 Sleef_logf8_u35fma4(__m256 a);
__m256 Sleef_logf8_u35avx2(__m256 a);

__m512 Sleef_logf16_u35(__m512 a);
__m512 Sleef_logf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_logf_u35 with the same accuracy specification.


Vectorized double precision base-10 logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_log10d2_u10(__m128d a);
__m128d Sleef_log10d2_u10sse2(__m128d a);
__m128d Sleef_log10d2_u10sse4(__m128d a);
__m128d Sleef_log10d2_u10avx2128(__m128d a);

__m256d Sleef_log10d4_u10(__m256d a);
__m256d Sleef_log10d4_u10avx(__m256d a);
__m256d Sleef_log10d4_u10fma4(__m256d a);
__m256d Sleef_log10d4_u10avx2(__m256d a);

__m512d Sleef_log10d8_u10(__m512d a);
__m512d Sleef_log10d8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log10_u10 with the same accuracy specification.


Vectorized single precision base-10 logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_log10f4_u10(__m128 a);
__m128 Sleef_log10f4_u10sse2(__m128 a);
__m128 Sleef_log10f4_u10sse4(__m128 a);
__m128 Sleef_log10f4_u10avx2128(__m128 a);

__m256 Sleef_log10f8_u10(__m256 a);
__m256 Sleef_log10f8_u10avx(__m256 a);
__m256 Sleef_log10f8_u10fma4(__m256 a);
__m256 Sleef_log10f8_u10avx2(__m256 a);

__m512 Sleef_log10f16_u10(__m512 a);
__m512 Sleef_log10f16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log10f_u10 with the same accuracy specification.


Vectorized double precision base-2 logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_log2d2_u10(__m128d a);
__m128d Sleef_log2d2_u10sse2(__m128d a);
__m128d Sleef_log2d2_u10sse4(__m128d a);
__m128d Sleef_log2d2_u10avx2128(__m128d a);

__m256d Sleef_log2d4_u10(__m256d a);
__m256d Sleef_log2d4_u10avx(__m256d a);
__m256d Sleef_log2d4_u10fma4(__m256d a);
__m256d Sleef_log2d4_u10avx2(__m256d a);

__m512d Sleef_log2d8_u10(__m512d a);
__m512d Sleef_log2d8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log2_u10 with the same accuracy specification.


Vectorized single precision base-2 logarithmic functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_log2f4_u10(__m128 a);
__m128 Sleef_log2f4_u10sse2(__m128 a);
__m128 Sleef_log2f4_u10sse4(__m128 a);
__m128 Sleef_log2f4_u10avx2128(__m128 a);

__m256 Sleef_log2f8_u10(__m256 a);
__m256 Sleef_log2f8_u10avx(__m256 a);
__m256 Sleef_log2f8_u10fma4(__m256 a);
__m256 Sleef_log2f8_u10avx2(__m256 a);

__m512 Sleef_log2f16_u10(__m512 a);
__m512 Sleef_log2f16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log2f_u10 with the same accuracy specification.


Vectorized double precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_log1pd2_u10(__m128d a);
__m128d Sleef_log1pd2_u10sse2(__m128d a);
__m128d Sleef_log1pd2_u10sse4(__m128d a);
__m128d Sleef_log1pd2_u10avx2128(__m128d a);

__m256d Sleef_log1pd4_u10(__m256d a);
__m256d Sleef_log1pd4_u10avx(__m256d a);
__m256d Sleef_log1pd4_u10fma4(__m256d a);
__m256d Sleef_log1pd4_u10avx2(__m256d a);

__m512d Sleef_log1pd8_u10(__m512d a);
__m512d Sleef_log1pd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log1p_u10 with the same accuracy specification.


Vectorized single precision logarithm of one plus argument with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_log1pf4_u10(__m128 a);
__m128 Sleef_log1pf4_u10sse2(__m128 a);
__m128 Sleef_log1pf4_u10sse4(__m128 a);
__m128 Sleef_log1pf4_u10avx2128(__m128 a);

__m256 Sleef_log1pf8_u10(__m256 a);
__m256 Sleef_log1pf8_u10avx(__m256 a);
__m256 Sleef_log1pf8_u10fma4(__m256 a);
__m256 Sleef_log1pf8_u10avx2(__m256 a);

__m512 Sleef_log1pf16_u10(__m512 a);
__m512 Sleef_log1pf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_log1pf_u10 with the same accuracy specification.


Vectorized double precision base-e exponential functions functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_expd2_u10(__m128d a);
__m128d Sleef_expd2_u10sse2(__m128d a);
__m128d Sleef_expd2_u10sse4(__m128d a);
__m128d Sleef_expd2_u10avx2128(__m128d a);

__m256d Sleef_expd4_u10(__m256d a);
__m256d Sleef_expd4_u10avx(__m256d a);
__m256d Sleef_expd4_u10fma4(__m256d a);
__m256d Sleef_expd4_u10avx2(__m256d a);

__m512d Sleef_expd8_u10(__m512d a);
__m512d Sleef_expd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_exp_u10 with the same accuracy specification.


Vectorized single precision base-e exponential functions functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_expf4_u10(__m128 a);
__m128 Sleef_expf4_u10sse2(__m128 a);
__m128 Sleef_expf4_u10sse4(__m128 a);
__m128 Sleef_expf4_u10avx2128(__m128 a);

__m256 Sleef_expf8_u10(__m256 a);
__m256 Sleef_expf8_u10avx(__m256 a);
__m256 Sleef_expf8_u10fma4(__m256 a);
__m256 Sleef_expf8_u10avx2(__m256 a);

__m512 Sleef_expf16_u10(__m512 a);
__m512 Sleef_expf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_expf_u10 with the same accuracy specification.


Vectorized double precision base-2 exponential functions functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_exp2d2_u10(__m128d a);
__m128d Sleef_exp2d2_u10sse2(__m128d a);
__m128d Sleef_exp2d2_u10sse4(__m128d a);
__m128d Sleef_exp2d2_u10avx2128(__m128d a);

__m256d Sleef_exp2d4_u10(__m256d a);
__m256d Sleef_exp2d4_u10avx(__m256d a);
__m256d Sleef_exp2d4_u10fma4(__m256d a);
__m256d Sleef_exp2d4_u10avx2(__m256d a);

__m512d Sleef_exp2d8_u10(__m512d a);
__m512d Sleef_exp2d8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_exp2_u10 with the same accuracy specification.


Vectorized single precision base-2 exponential functions functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_exp2f4_u10(__m128 a);
__m128 Sleef_exp2f4_u10sse2(__m128 a);
__m128 Sleef_exp2f4_u10sse4(__m128 a);
__m128 Sleef_exp2f4_u10avx2128(__m128 a);

__m256 Sleef_exp2f8_u10(__m256 a);
__m256 Sleef_exp2f8_u10avx(__m256 a);
__m256 Sleef_exp2f8_u10fma4(__m256 a);
__m256 Sleef_exp2f8_u10avx2(__m256 a);

__m512 Sleef_exp2f16_u10(__m512 a);
__m512 Sleef_exp2f16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_exp2f_u10 with the same accuracy specification.


Vectorized double precision base-10 exponential functions functions with 1.09 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_exp10d2_u10(__m128d a);
__m128d Sleef_exp10d2_u10sse2(__m128d a);
__m128d Sleef_exp10d2_u10sse4(__m128d a);
__m128d Sleef_exp10d2_u10avx2128(__m128d a);

__m256d Sleef_exp10d4_u10(__m256d a);
__m256d Sleef_exp10d4_u10avx(__m256d a);
__m256d Sleef_exp10d4_u10fma4(__m256d a);
__m256d Sleef_exp10d4_u10avx2(__m256d a);

__m512d Sleef_exp10d8_u10(__m512d a);
__m512d Sleef_exp10d8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_exp10_u10 with the same accuracy specification.


Vectorized single precision base-10 exponential functions functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_exp10f4_u10(__m128 a);
__m128 Sleef_exp10f4_u10sse2(__m128 a);
__m128 Sleef_exp10f4_u10sse4(__m128 a);
__m128 Sleef_exp10f4_u10avx2128(__m128 a);

__m256 Sleef_exp10f8_u10(__m256 a);
__m256 Sleef_exp10f8_u10avx(__m256 a);
__m256 Sleef_exp10f8_u10fma4(__m256 a);
__m256 Sleef_exp10f8_u10avx2(__m256 a);

__m512 Sleef_exp10f16_u10(__m512 a);
__m512 Sleef_exp10f16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_exp10f_u10 with the same accuracy specification.


Vectorized double precision base-e exponential functions minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_expm1d2_u10(__m128d a);
__m128d Sleef_expm1d2_u10sse2(__m128d a);
__m128d Sleef_expm1d2_u10sse4(__m128d a);
__m128d Sleef_expm1d2_u10avx2128(__m128d a);

__m256d Sleef_expm1d4_u10(__m256d a);
__m256d Sleef_expm1d4_u10avx(__m256d a);
__m256d Sleef_expm1d4_u10fma4(__m256d a);
__m256d Sleef_expm1d4_u10avx2(__m256d a);

__m512d Sleef_expm1d8_u10(__m512d a);
__m512d Sleef_expm1d8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_expm1_u10 with the same accuracy specification.


Vectorized single precision base-e exponential functions minus 1 with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_expm1f4_u10(__m128 a);
__m128 Sleef_expm1f4_u10sse2(__m128 a);
__m128 Sleef_expm1f4_u10sse4(__m128 a);
__m128 Sleef_expm1f4_u10avx2128(__m128 a);

__m256 Sleef_expm1f8_u10(__m256 a);
__m256 Sleef_expm1f8_u10avx(__m256 a);
__m256 Sleef_expm1f8_u10fma4(__m256 a);
__m256 Sleef_expm1f8_u10avx2(__m256 a);

__m512 Sleef_expm1f16_u10(__m512 a);
__m512 Sleef_expm1f16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_expm1f_u10 with the same accuracy specification.


Vectorized double precision square root functions with 0.5001 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_sqrtd2(__m128d a);
__m128d Sleef_sqrtd2_sse2(__m128d a);
__m128d Sleef_sqrtd2_sse4(__m128d a);
__m128d Sleef_sqrtd2_avx2128(__m128d a);

__m256d Sleef_sqrtd4(__m256d a);
__m256d Sleef_sqrtd4_avx(__m256d a);
__m256d Sleef_sqrtd4_fma4(__m256d a);
__m256d Sleef_sqrtd4_avx2(__m256d a);

__m512d Sleef_sqrtd8(__m512d a);
__m512d Sleef_sqrtd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sqrt_u05 with the same accuracy specification.


Vectorized single precision square root functions with 0.5001 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_sqrtf4(__m128 a);
__m128 Sleef_sqrtf4_sse2(__m128 a);
__m128 Sleef_sqrtf4_sse4(__m128 a);
__m128 Sleef_sqrtf4_avx2128(__m128 a);

__m256 Sleef_sqrtf8(__m256 a);
__m256 Sleef_sqrtf8_avx(__m256 a);
__m256 Sleef_sqrtf8_fma4(__m256 a);
__m256 Sleef_sqrtf8_avx2(__m256 a);

__m512 Sleef_sqrtf16(__m512 a);
__m512 Sleef_sqrtf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sqrtf_u05 with the same accuracy specification.


Vectorized double precision square root functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_sqrtd2_u35(__m128d a);
__m128d Sleef_sqrtd2_u35sse2(__m128d a);
__m128d Sleef_sqrtd2_u35sse4(__m128d a);
__m128d Sleef_sqrtd2_u35avx2128(__m128d a);

__m256d Sleef_sqrtd4_u35(__m256d a);
__m256d Sleef_sqrtd4_u35avx(__m256d a);
__m256d Sleef_sqrtd4_u35fma4(__m256d a);
__m256d Sleef_sqrtd4_u35avx2(__m256d a);

__m512d Sleef_sqrtd8_u35(__m512d a);
__m512d Sleef_sqrtd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sqrt_u35 with the same accuracy specification.


Vectorized single precision square root functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_sqrtf4_u35(__m128 a);
__m128 Sleef_sqrtf4_u35sse2(__m128 a);
__m128 Sleef_sqrtf4_u35sse4(__m128 a);
__m128 Sleef_sqrtf4_u35avx2128(__m128 a);

__m256 Sleef_sqrtf8_u35(__m256 a);
__m256 Sleef_sqrtf8_u35avx(__m256 a);
__m256 Sleef_sqrtf8_u35fma4(__m256 a);
__m256 Sleef_sqrtf8_u35avx2(__m256 a);

__m512 Sleef_sqrtf16_u35(__m512 a);
__m512 Sleef_sqrtf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sqrtf_u35 with the same accuracy specification.


Vectorized double precision cubic root functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_cbrtd2_u10(__m128d a);
__m128d Sleef_cbrtd2_u10sse2(__m128d a);
__m128d Sleef_cbrtd2_u10sse4(__m128d a);
__m128d Sleef_cbrtd2_u10avx2128(__m128d a);

__m256d Sleef_cbrtd4_u10(__m256d a);
__m256d Sleef_cbrtd4_u10avx(__m256d a);
__m256d Sleef_cbrtd4_u10fma4(__m256d a);
__m256d Sleef_cbrtd4_u10avx2(__m256d a);

__m512d Sleef_cbrtd8_u10(__m512d a);
__m512d Sleef_cbrtd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cbrt_u10 with the same accuracy specification.


Vectorized single precision cubic root functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_cbrtf4_u10(__m128 a);
__m128 Sleef_cbrtf4_u10sse2(__m128 a);
__m128 Sleef_cbrtf4_u10sse4(__m128 a);
__m128 Sleef_cbrtf4_u10avx2128(__m128 a);

__m256 Sleef_cbrtf8_u10(__m256 a);
__m256 Sleef_cbrtf8_u10avx(__m256 a);
__m256 Sleef_cbrtf8_u10fma4(__m256 a);
__m256 Sleef_cbrtf8_u10avx2(__m256 a);

__m512 Sleef_cbrtf16_u10(__m512 a);
__m512 Sleef_cbrtf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cbrtf_u10 with the same accuracy specification.


Vectorized double precision cubic root functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_cbrtd2_u35(__m128d a);
__m128d Sleef_cbrtd2_u35sse2(__m128d a);
__m128d Sleef_cbrtd2_u35sse4(__m128d a);
__m128d Sleef_cbrtd2_u35avx2128(__m128d a);

__m256d Sleef_cbrtd4_u35(__m256d a);
__m256d Sleef_cbrtd4_u35avx(__m256d a);
__m256d Sleef_cbrtd4_u35fma4(__m256d a);
__m256d Sleef_cbrtd4_u35avx2(__m256d a);

__m512d Sleef_cbrtd8_u35(__m512d a);
__m512d Sleef_cbrtd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cbrt_u35 with the same accuracy specification.


Vectorized single precision cubic root functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_cbrtf4_u35(__m128 a);
__m128 Sleef_cbrtf4_u35sse2(__m128 a);
__m128 Sleef_cbrtf4_u35sse4(__m128 a);
__m128 Sleef_cbrtf4_u35avx2128(__m128 a);

__m256 Sleef_cbrtf8_u35(__m256 a);
__m256 Sleef_cbrtf8_u35avx(__m256 a);
__m256 Sleef_cbrtf8_u35fma4(__m256 a);
__m256 Sleef_cbrtf8_u35avx2(__m256 a);

__m512 Sleef_cbrtf16_u35(__m512 a);
__m512 Sleef_cbrtf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cbrtf_u35 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance functions with 0.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_hypotd2_u05(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u05sse2(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u05sse4(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u05avx2128(__m128d a, __m128d b);

__m256d Sleef_hypotd4_u05(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u05avx(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u05fma4(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u05avx2(__m256d a, __m256d b);

__m512d Sleef_hypotd8_u05(__m512d a, __m512d b);
__m512d Sleef_hypotd8_u05avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_hypot_u05 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance functions with 0.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_hypotf4_u05(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u05sse2(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u05sse4(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u05avx2128(__m128 a, __m128 b);

__m256 Sleef_hypotf8_u05(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u05avx(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u05fma4(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u05avx2(__m256 a, __m256 b);

__m512 Sleef_hypotf16_u05(__m512 a, __m512 b);
__m512 Sleef_hypotf16_u05avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_hypotf_u05 with the same accuracy specification.


Vectorized double precision 2D Euclidian distance functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_hypotd2_u35(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u35sse2(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u35sse4(__m128d a, __m128d b);
__m128d Sleef_hypotd2_u35avx2128(__m128d a, __m128d b);

__m256d Sleef_hypotd4_u35(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u35avx(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u35fma4(__m256d a, __m256d b);
__m256d Sleef_hypotd4_u35avx2(__m256d a, __m256d b);

__m512d Sleef_hypotd8_u35(__m512d a, __m512d b);
__m512d Sleef_hypotd8_u35avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_hypot_u35 with the same accuracy specification.


Vectorized single precision 2D Euclidian distance functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_hypotf4_u35(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u35sse2(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u35sse4(__m128 a, __m128 b);
__m128 Sleef_hypotf4_u35avx2128(__m128 a, __m128 b);

__m256 Sleef_hypotf8_u35(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u35avx(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u35fma4(__m256 a, __m256 b);
__m256 Sleef_hypotf8_u35avx2(__m256 a, __m256 b);

__m512 Sleef_hypotf16_u35(__m512 a, __m512 b);
__m512 Sleef_hypotf16_u35avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_hypotf_u35 with the same accuracy specification.

Inverse Trigonometric Functions

Vectorized double precision arc sine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_asind2_u10(__m128d a);
__m128d Sleef_asind2_u10sse2(__m128d a);
__m128d Sleef_asind2_u10sse4(__m128d a);
__m128d Sleef_asind2_u10avx2128(__m128d a);

__m256d Sleef_asind4_u10(__m256d a);
__m256d Sleef_asind4_u10avx(__m256d a);
__m256d Sleef_asind4_u10fma4(__m256d a);
__m256d Sleef_asind4_u10avx2(__m256d a);

__m512d Sleef_asind8_u10(__m512d a);
__m512d Sleef_asind8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asin_u10 with the same accuracy specification.


Vectorized single precision arc sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_asinf4_u10(__m128 a);
__m128 Sleef_asinf4_u10sse2(__m128 a);
__m128 Sleef_asinf4_u10sse4(__m128 a);
__m128 Sleef_asinf4_u10avx2128(__m128 a);

__m256 Sleef_asinf8_u10(__m256 a);
__m256 Sleef_asinf8_u10avx(__m256 a);
__m256 Sleef_asinf8_u10fma4(__m256 a);
__m256 Sleef_asinf8_u10avx2(__m256 a);

__m512 Sleef_asinf16_u10(__m512 a);
__m512 Sleef_asinf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asinf_u10 with the same accuracy specification.


Vectorized double precision arc sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_asind2_u35(__m128d a);
__m128d Sleef_asind2_u35sse2(__m128d a);
__m128d Sleef_asind2_u35sse4(__m128d a);
__m128d Sleef_asind2_u35avx2128(__m128d a);

__m256d Sleef_asind4_u35(__m256d a);
__m256d Sleef_asind4_u35avx(__m256d a);
__m256d Sleef_asind4_u35fma4(__m256d a);
__m256d Sleef_asind4_u35avx2(__m256d a);

__m512d Sleef_asind8_u35(__m512d a);
__m512d Sleef_asind8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asin_u35 with the same accuracy specification.


Vectorized single precision arc sine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_asinf4_u35(__m128 a);
__m128 Sleef_asinf4_u35sse2(__m128 a);
__m128 Sleef_asinf4_u35sse4(__m128 a);
__m128 Sleef_asinf4_u35avx2128(__m128 a);

__m256 Sleef_asinf8_u35(__m256 a);
__m256 Sleef_asinf8_u35avx(__m256 a);
__m256 Sleef_asinf8_u35fma4(__m256 a);
__m256 Sleef_asinf8_u35avx2(__m256 a);

__m512 Sleef_asinf16_u35(__m512 a);
__m512 Sleef_asinf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asinf_u35 with the same accuracy specification.


Vectorized double precision arc cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_acosd2_u10(__m128d a);
__m128d Sleef_acosd2_u10sse2(__m128d a);
__m128d Sleef_acosd2_u10sse4(__m128d a);
__m128d Sleef_acosd2_u10avx2128(__m128d a);

__m256d Sleef_acosd4_u10(__m256d a);
__m256d Sleef_acosd4_u10avx(__m256d a);
__m256d Sleef_acosd4_u10fma4(__m256d a);
__m256d Sleef_acosd4_u10avx2(__m256d a);

__m512d Sleef_acosd8_u10(__m512d a);
__m512d Sleef_acosd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acos_u10 with the same accuracy specification.


Vectorized single precision arc cosine functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_acosf4_u10(__m128 a);
__m128 Sleef_acosf4_u10sse2(__m128 a);
__m128 Sleef_acosf4_u10sse4(__m128 a);
__m128 Sleef_acosf4_u10avx2128(__m128 a);

__m256 Sleef_acosf8_u10(__m256 a);
__m256 Sleef_acosf8_u10avx(__m256 a);
__m256 Sleef_acosf8_u10fma4(__m256 a);
__m256 Sleef_acosf8_u10avx2(__m256 a);

__m512 Sleef_acosf16_u10(__m512 a);
__m512 Sleef_acosf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acosf_u10 with the same accuracy specification.


Vectorized double precision arc cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_acosd2_u35(__m128d a);
__m128d Sleef_acosd2_u35sse2(__m128d a);
__m128d Sleef_acosd2_u35sse4(__m128d a);
__m128d Sleef_acosd2_u35avx2128(__m128d a);

__m256d Sleef_acosd4_u35(__m256d a);
__m256d Sleef_acosd4_u35avx(__m256d a);
__m256d Sleef_acosd4_u35fma4(__m256d a);
__m256d Sleef_acosd4_u35avx2(__m256d a);

__m512d Sleef_acosd8_u35(__m512d a);
__m512d Sleef_acosd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acos_u35 with the same accuracy specification.


Vectorized single precision arc cosine functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_acosf4_u35(__m128 a);
__m128 Sleef_acosf4_u35sse2(__m128 a);
__m128 Sleef_acosf4_u35sse4(__m128 a);
__m128 Sleef_acosf4_u35avx2128(__m128 a);

__m256 Sleef_acosf8_u35(__m256 a);
__m256 Sleef_acosf8_u35avx(__m256 a);
__m256 Sleef_acosf8_u35fma4(__m256 a);
__m256 Sleef_acosf8_u35avx2(__m256 a);

__m512 Sleef_acosf16_u35(__m512 a);
__m512 Sleef_acosf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acosf_u35 with the same accuracy specification.


Vectorized double precision arc tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_atand2_u10(__m128d a);
__m128d Sleef_atand2_u10sse2(__m128d a);
__m128d Sleef_atand2_u10sse4(__m128d a);
__m128d Sleef_atand2_u10avx2128(__m128d a);

__m256d Sleef_atand4_u10(__m256d a);
__m256d Sleef_atand4_u10avx(__m256d a);
__m256d Sleef_atand4_u10fma4(__m256d a);
__m256d Sleef_atand4_u10avx2(__m256d a);

__m512d Sleef_atand8_u10(__m512d a);
__m512d Sleef_atand8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan_u10 with the same accuracy specification.


Vectorized single precision arc tangent functions with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_atanf4_u10(__m128 a);
__m128 Sleef_atanf4_u10sse2(__m128 a);
__m128 Sleef_atanf4_u10sse4(__m128 a);
__m128 Sleef_atanf4_u10avx2128(__m128 a);

__m256 Sleef_atanf8_u10(__m256 a);
__m256 Sleef_atanf8_u10avx(__m256 a);
__m256 Sleef_atanf8_u10fma4(__m256 a);
__m256 Sleef_atanf8_u10avx2(__m256 a);

__m512 Sleef_atanf16_u10(__m512 a);
__m512 Sleef_atanf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atanf_u10 with the same accuracy specification.


Vectorized double precision arc tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_atand2_u35(__m128d a);
__m128d Sleef_atand2_u35sse2(__m128d a);
__m128d Sleef_atand2_u35sse4(__m128d a);
__m128d Sleef_atand2_u35avx2128(__m128d a);

__m256d Sleef_atand4_u35(__m256d a);
__m256d Sleef_atand4_u35avx(__m256d a);
__m256d Sleef_atand4_u35fma4(__m256d a);
__m256d Sleef_atand4_u35avx2(__m256d a);

__m512d Sleef_atand8_u35(__m512d a);
__m512d Sleef_atand8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan_u35 with the same accuracy specification.


Vectorized single precision arc tangent functions with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_atanf4_u35(__m128 a);
__m128 Sleef_atanf4_u35sse2(__m128 a);
__m128 Sleef_atanf4_u35sse4(__m128 a);
__m128 Sleef_atanf4_u35avx2128(__m128 a);

__m256 Sleef_atanf8_u35(__m256 a);
__m256 Sleef_atanf8_u35avx(__m256 a);
__m256 Sleef_atanf8_u35fma4(__m256 a);
__m256 Sleef_atanf8_u35avx2(__m256 a);

__m512 Sleef_atanf16_u35(__m512 a);
__m512 Sleef_atanf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atanf_u35 with the same accuracy specification.


Vectorized double precision arc tangent functions of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_atan2d2_u10(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u10sse2(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u10sse4(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u10avx2128(__m128d a, __m128d b);

__m256d Sleef_atan2d4_u10(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u10avx(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u10fma4(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u10avx2(__m256d a, __m256d b);

__m512d Sleef_atan2d8_u10(__m512d a, __m512d b);
__m512d Sleef_atan2d8_u10avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan2_u10 with the same accuracy specification.


Vectorized single precision arc tangent functions of two variables with 1.0 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_atan2f4_u10(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u10sse2(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u10sse4(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u10avx2128(__m128 a, __m128 b);

__m256 Sleef_atan2f8_u10(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u10avx(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u10fma4(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u10avx2(__m256 a, __m256 b);

__m512 Sleef_atan2f16_u10(__m512 a, __m512 b);
__m512 Sleef_atan2f16_u10avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan2f_u10 with the same accuracy specification.


Vectorized double precision arc tangent functions of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128d Sleef_atan2d2_u35(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u35sse2(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u35sse4(__m128d a, __m128d b);
__m128d Sleef_atan2d2_u35avx2128(__m128d a, __m128d b);

__m256d Sleef_atan2d4_u35(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u35avx(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u35fma4(__m256d a, __m256d b);
__m256d Sleef_atan2d4_u35avx2(__m256d a, __m256d b);

__m512d Sleef_atan2d8_u35(__m512d a, __m512d b);
__m512d Sleef_atan2d8_u35avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan2_u35 with the same accuracy specification.


Vectorized single precision arc tangent functions of two variables with 3.5 ULP error bound

Synopsis

#include <sleef.h>

__m128 Sleef_atan2f4_u35(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u35sse2(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u35sse4(__m128 a, __m128 b);
__m128 Sleef_atan2f4_u35avx2128(__m128 a, __m128 b);

__m256 Sleef_atan2f8_u35(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u35avx(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u35fma4(__m256 a, __m256 b);
__m256 Sleef_atan2f8_u35avx2(__m256 a, __m256 b);

__m512 Sleef_atan2f16_u35(__m512 a, __m512 b);
__m512 Sleef_atan2f16_u35avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atan2f_u35 with the same accuracy specification.

Hyperbolic functions and inverse hyperbolic functions

Vectorized double precision hyperbolic sine functions

Synopsis

#include <sleef.h>

__m128d Sleef_sinhd2_u10(__m128d a);
__m128d Sleef_sinhd2_u10sse2(__m128d a);
__m128d Sleef_sinhd2_u10sse4(__m128d a);
__m128d Sleef_sinhd2_u10avx2128(__m128d a);

__m256d Sleef_sinhd4_u10(__m256d a);
__m256d Sleef_sinhd4_u10avx(__m256d a);
__m256d Sleef_sinhd4_u10fma4(__m256d a);
__m256d Sleef_sinhd4_u10avx2(__m256d a);

__m512d Sleef_sinhd8_u10(__m512d a);
__m512d Sleef_sinhd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic sine functions

Synopsis

#include <sleef.h>

__m128 Sleef_sinhf4_u10(__m128 a);
__m128 Sleef_sinhf4_u10sse2(__m128 a);
__m128 Sleef_sinhf4_u10sse4(__m128 a);
__m128 Sleef_sinhf4_u10avx2128(__m128 a);

__m256 Sleef_sinhf8_u10(__m256 a);
__m256 Sleef_sinhf8_u10avx(__m256 a);
__m256 Sleef_sinhf8_u10fma4(__m256 a);
__m256 Sleef_sinhf8_u10avx2(__m256 a);

__m512 Sleef_sinhf16_u10(__m512 a);
__m512 Sleef_sinhf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic sine functions

Synopsis

#include <sleef.h>

__m128d Sleef_sinhd2_u35(__m128d a);
__m128d Sleef_sinhd2_u35sse2(__m128d a);
__m128d Sleef_sinhd2_u35sse4(__m128d a);
__m128d Sleef_sinhd2_u35avx2128(__m128d a);

__m256d Sleef_sinhd4_u35(__m256d a);
__m256d Sleef_sinhd4_u35avx(__m256d a);
__m256d Sleef_sinhd4_u35fma4(__m256d a);
__m256d Sleef_sinhd4_u35avx2(__m256d a);

__m512d Sleef_sinhd8_u35(__m512d a);
__m512d Sleef_sinhd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic sine functions

Synopsis

#include <sleef.h>

__m128 Sleef_sinhf4_u35(__m128 a);
__m128 Sleef_sinhf4_u35sse2(__m128 a);
__m128 Sleef_sinhf4_u35sse4(__m128 a);
__m128 Sleef_sinhf4_u35avx2128(__m128 a);

__m256 Sleef_sinhf8_u35(__m256 a);
__m256 Sleef_sinhf8_u35avx(__m256 a);
__m256 Sleef_sinhf8_u35fma4(__m256 a);
__m256 Sleef_sinhf8_u35avx2(__m256 a);

__m512 Sleef_sinhf16_u35(__m512 a);
__m512 Sleef_sinhf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_sinhf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic cosine functions

Synopsis

#include <sleef.h>

__m128d Sleef_coshd2_u10(__m128d a);
__m128d Sleef_coshd2_u10sse2(__m128d a);
__m128d Sleef_coshd2_u10sse4(__m128d a);
__m128d Sleef_coshd2_u10avx2128(__m128d a);

__m256d Sleef_coshd4_u10(__m256d a);
__m256d Sleef_coshd4_u10avx(__m256d a);
__m256d Sleef_coshd4_u10fma4(__m256d a);
__m256d Sleef_coshd4_u10avx2(__m256d a);

__m512d Sleef_coshd8_u10(__m512d a);
__m512d Sleef_coshd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cosh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic cosine functions

Synopsis

#include <sleef.h>

__m128 Sleef_coshf4_u10(__m128 a);
__m128 Sleef_coshf4_u10sse2(__m128 a);
__m128 Sleef_coshf4_u10sse4(__m128 a);
__m128 Sleef_coshf4_u10avx2128(__m128 a);

__m256 Sleef_coshf8_u10(__m256 a);
__m256 Sleef_coshf8_u10avx(__m256 a);
__m256 Sleef_coshf8_u10fma4(__m256 a);
__m256 Sleef_coshf8_u10avx2(__m256 a);

__m512 Sleef_coshf16_u10(__m512 a);
__m512 Sleef_coshf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_coshf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic cosine functions

Synopsis

#include <sleef.h>

__m128d Sleef_coshd2_u35(__m128d a);
__m128d Sleef_coshd2_u35sse2(__m128d a);
__m128d Sleef_coshd2_u35sse4(__m128d a);
__m128d Sleef_coshd2_u35avx2128(__m128d a);

__m256d Sleef_coshd4_u35(__m256d a);
__m256d Sleef_coshd4_u35avx(__m256d a);
__m256d Sleef_coshd4_u35fma4(__m256d a);
__m256d Sleef_coshd4_u35avx2(__m256d a);

__m512d Sleef_coshd8_u35(__m512d a);
__m512d Sleef_coshd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_cosh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic cosine functions

Synopsis

#include <sleef.h>

__m128 Sleef_coshf4_u35(__m128 a);
__m128 Sleef_coshf4_u35sse2(__m128 a);
__m128 Sleef_coshf4_u35sse4(__m128 a);
__m128 Sleef_coshf4_u35avx2128(__m128 a);

__m256 Sleef_coshf8_u35(__m256 a);
__m256 Sleef_coshf8_u35avx(__m256 a);
__m256 Sleef_coshf8_u35fma4(__m256 a);
__m256 Sleef_coshf8_u35avx2(__m256 a);

__m512 Sleef_coshf16_u35(__m512 a);
__m512 Sleef_coshf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_coshf_u35 with the same accuracy specification.


Vectorized double precision hyperbolic tangent functions

Synopsis

#include <sleef.h>

__m128d Sleef_tanhd2_u10(__m128d a);
__m128d Sleef_tanhd2_u10sse2(__m128d a);
__m128d Sleef_tanhd2_u10sse4(__m128d a);
__m128d Sleef_tanhd2_u10avx2128(__m128d a);

__m256d Sleef_tanhd4_u10(__m256d a);
__m256d Sleef_tanhd4_u10avx(__m256d a);
__m256d Sleef_tanhd4_u10fma4(__m256d a);
__m256d Sleef_tanhd4_u10avx2(__m256d a);

__m512d Sleef_tanhd8_u10(__m512d a);
__m512d Sleef_tanhd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanh_u10 with the same accuracy specification.


Vectorized single precision hyperbolic tangent functions

Synopsis

#include <sleef.h>

__m128 Sleef_tanhf4_u10(__m128 a);
__m128 Sleef_tanhf4_u10sse2(__m128 a);
__m128 Sleef_tanhf4_u10sse4(__m128 a);
__m128 Sleef_tanhf4_u10avx2128(__m128 a);

__m256 Sleef_tanhf8_u10(__m256 a);
__m256 Sleef_tanhf8_u10avx(__m256 a);
__m256 Sleef_tanhf8_u10fma4(__m256 a);
__m256 Sleef_tanhf8_u10avx2(__m256 a);

__m512 Sleef_tanhf16_u10(__m512 a);
__m512 Sleef_tanhf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanhf_u10 with the same accuracy specification.


Vectorized double precision hyperbolic tangent functions

Synopsis

#include <sleef.h>

__m128d Sleef_tanhd2_u35(__m128d a);
__m128d Sleef_tanhd2_u35sse2(__m128d a);
__m128d Sleef_tanhd2_u35sse4(__m128d a);
__m128d Sleef_tanhd2_u35avx2128(__m128d a);

__m256d Sleef_tanhd4_u35(__m256d a);
__m256d Sleef_tanhd4_u35avx(__m256d a);
__m256d Sleef_tanhd4_u35fma4(__m256d a);
__m256d Sleef_tanhd4_u35avx2(__m256d a);

__m512d Sleef_tanhd8_u35(__m512d a);
__m512d Sleef_tanhd8_u35avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanh_u35 with the same accuracy specification.


Vectorized single precision hyperbolic tangent functions

Synopsis

#include <sleef.h>

__m128 Sleef_tanhf4_u35(__m128 a);
__m128 Sleef_tanhf4_u35sse2(__m128 a);
__m128 Sleef_tanhf4_u35sse4(__m128 a);
__m128 Sleef_tanhf4_u35avx2128(__m128 a);

__m256 Sleef_tanhf8_u35(__m256 a);
__m256 Sleef_tanhf8_u35avx(__m256 a);
__m256 Sleef_tanhf8_u35fma4(__m256 a);
__m256 Sleef_tanhf8_u35avx2(__m256 a);

__m512 Sleef_tanhf16_u35(__m512 a);
__m512 Sleef_tanhf16_u35avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tanhf_u35 with the same accuracy specification.


Vectorized double precision inverse hyperbolic sine functions

Synopsis

#include <sleef.h>

__m128d Sleef_asinhd2_u10(__m128d a);
__m128d Sleef_asinhd2_u10sse2(__m128d a);
__m128d Sleef_asinhd2_u10sse4(__m128d a);
__m128d Sleef_asinhd2_u10avx2128(__m128d a);

__m256d Sleef_asinhd4_u10(__m256d a);
__m256d Sleef_asinhd4_u10avx(__m256d a);
__m256d Sleef_asinhd4_u10fma4(__m256d a);
__m256d Sleef_asinhd4_u10avx2(__m256d a);

__m512d Sleef_asinhd8_u10(__m512d a);
__m512d Sleef_asinhd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asinh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic sine functions

Synopsis

#include <sleef.h>

__m128 Sleef_asinhf4_u10(__m128 a);
__m128 Sleef_asinhf4_u10sse2(__m128 a);
__m128 Sleef_asinhf4_u10sse4(__m128 a);
__m128 Sleef_asinhf4_u10avx2128(__m128 a);

__m256 Sleef_asinhf8_u10(__m256 a);
__m256 Sleef_asinhf8_u10avx(__m256 a);
__m256 Sleef_asinhf8_u10fma4(__m256 a);
__m256 Sleef_asinhf8_u10avx2(__m256 a);

__m512 Sleef_asinhf16_u10(__m512 a);
__m512 Sleef_asinhf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_asinhf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic cosine functions

Synopsis

#include <sleef.h>

__m128d Sleef_acoshd2_u10(__m128d a);
__m128d Sleef_acoshd2_u10sse2(__m128d a);
__m128d Sleef_acoshd2_u10sse4(__m128d a);
__m128d Sleef_acoshd2_u10avx2128(__m128d a);

__m256d Sleef_acoshd4_u10(__m256d a);
__m256d Sleef_acoshd4_u10avx(__m256d a);
__m256d Sleef_acoshd4_u10fma4(__m256d a);
__m256d Sleef_acoshd4_u10avx2(__m256d a);

__m512d Sleef_acoshd8_u10(__m512d a);
__m512d Sleef_acoshd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acosh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic cosine functions

Synopsis

#include <sleef.h>

__m128 Sleef_acoshf4_u10(__m128 a);
__m128 Sleef_acoshf4_u10sse2(__m128 a);
__m128 Sleef_acoshf4_u10sse4(__m128 a);
__m128 Sleef_acoshf4_u10avx2128(__m128 a);

__m256 Sleef_acoshf8_u10(__m256 a);
__m256 Sleef_acoshf8_u10avx(__m256 a);
__m256 Sleef_acoshf8_u10fma4(__m256 a);
__m256 Sleef_acoshf8_u10avx2(__m256 a);

__m512 Sleef_acoshf16_u10(__m512 a);
__m512 Sleef_acoshf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_acoshf_u10 with the same accuracy specification.


Vectorized double precision inverse hyperbolic tangent functions

Synopsis

#include <sleef.h>

__m128d Sleef_atanhd2_u10(__m128d a);
__m128d Sleef_atanhd2_u10sse2(__m128d a);
__m128d Sleef_atanhd2_u10sse4(__m128d a);
__m128d Sleef_atanhd2_u10avx2128(__m128d a);

__m256d Sleef_atanhd4_u10(__m256d a);
__m256d Sleef_atanhd4_u10avx(__m256d a);
__m256d Sleef_atanhd4_u10fma4(__m256d a);
__m256d Sleef_atanhd4_u10avx2(__m256d a);

__m512d Sleef_atanhd8_u10(__m512d a);
__m512d Sleef_atanhd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atanh_u10 with the same accuracy specification.


Vectorized single precision inverse hyperbolic tangent functions

Synopsis

#include <sleef.h>

__m128 Sleef_atanhf4_u10(__m128 a);
__m128 Sleef_atanhf4_u10sse2(__m128 a);
__m128 Sleef_atanhf4_u10sse4(__m128 a);
__m128 Sleef_atanhf4_u10avx2128(__m128 a);

__m256 Sleef_atanhf8_u10(__m256 a);
__m256 Sleef_atanhf8_u10avx(__m256 a);
__m256 Sleef_atanhf8_u10fma4(__m256 a);
__m256 Sleef_atanhf8_u10avx2(__m256 a);

__m512 Sleef_atanhf16_u10(__m512 a);
__m512 Sleef_atanhf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_atanhf_u10 with the same accuracy specification.

Error and gamma functions

Vectorized double precision error functions

Synopsis

#include <sleef.h>

__m128d Sleef_erfd2_u10(__m128d a);
__m128d Sleef_erfd2_u10sse2(__m128d a);
__m128d Sleef_erfd2_u10sse4(__m128d a);
__m128d Sleef_erfd2_u10avx2128(__m128d a);

__m256d Sleef_erfd4_u10(__m256d a);
__m256d Sleef_erfd4_u10avx(__m256d a);
__m256d Sleef_erfd4_u10fma4(__m256d a);
__m256d Sleef_erfd4_u10avx2(__m256d a);

__m512d Sleef_erfd8_u10(__m512d a);
__m512d Sleef_erfd8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_erf_u10 with the same accuracy specification.


Vectorized single precision error functions

Synopsis

#include <sleef.h>

__m128 Sleef_erff4_u10(__m128 a);
__m128 Sleef_erff4_u10sse2(__m128 a);
__m128 Sleef_erff4_u10sse4(__m128 a);
__m128 Sleef_erff4_u10avx2128(__m128 a);

__m256 Sleef_erff8_u10(__m256 a);
__m256 Sleef_erff8_u10avx(__m256 a);
__m256 Sleef_erff8_u10fma4(__m256 a);
__m256 Sleef_erff8_u10avx2(__m256 a);

__m512 Sleef_erff16_u10(__m512 a);
__m512 Sleef_erff16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_erff_u10 with the same accuracy specification.


Vectorized double precision complementary error functions

Synopsis

#include <sleef.h>

__m128d Sleef_erfcd2_u15(__m128d a);
__m128d Sleef_erfcd2_u15sse2(__m128d a);
__m128d Sleef_erfcd2_u15sse4(__m128d a);
__m128d Sleef_erfcd2_u15avx2128(__m128d a);

__m256d Sleef_erfcd4_u15(__m256d a);
__m256d Sleef_erfcd4_u15avx(__m256d a);
__m256d Sleef_erfcd4_u15fma4(__m256d a);
__m256d Sleef_erfcd4_u15avx2(__m256d a);

__m512d Sleef_erfcd8_u15(__m512d a);
__m512d Sleef_erfcd8_u15avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_erfc_u15 with the same accuracy specification.


Vectorized single precision complementary error functions

Synopsis

#include <sleef.h>

__m128 Sleef_erfcf4_u15(__m128 a);
__m128 Sleef_erfcf4_u15sse2(__m128 a);
__m128 Sleef_erfcf4_u15sse4(__m128 a);
__m128 Sleef_erfcf4_u15avx2128(__m128 a);

__m256 Sleef_erfcf8_u15(__m256 a);
__m256 Sleef_erfcf8_u15avx(__m256 a);
__m256 Sleef_erfcf8_u15fma4(__m256 a);
__m256 Sleef_erfcf8_u15avx2(__m256 a);

__m512 Sleef_erfcf16_u15(__m512 a);
__m512 Sleef_erfcf16_u15avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_erfcf_u15 with the same accuracy specification.


Vectorized double precision gamma functions

Synopsis

#include <sleef.h>

__m128d Sleef_tgammad2_u10(__m128d a);
__m128d Sleef_tgammad2_u10sse2(__m128d a);
__m128d Sleef_tgammad2_u10sse4(__m128d a);
__m128d Sleef_tgammad2_u10avx2128(__m128d a);

__m256d Sleef_tgammad4_u10(__m256d a);
__m256d Sleef_tgammad4_u10avx(__m256d a);
__m256d Sleef_tgammad4_u10fma4(__m256d a);
__m256d Sleef_tgammad4_u10avx2(__m256d a);

__m512d Sleef_tgammad8_u10(__m512d a);
__m512d Sleef_tgammad8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tgamma_u10 with the same accuracy specification.


Vectorized single precision gamma functions

Synopsis

#include <sleef.h>

__m128 Sleef_tgammaf4_u10(__m128 a);
__m128 Sleef_tgammaf4_u10sse2(__m128 a);
__m128 Sleef_tgammaf4_u10sse4(__m128 a);
__m128 Sleef_tgammaf4_u10avx2128(__m128 a);

__m256 Sleef_tgammaf8_u10(__m256 a);
__m256 Sleef_tgammaf8_u10avx(__m256 a);
__m256 Sleef_tgammaf8_u10fma4(__m256 a);
__m256 Sleef_tgammaf8_u10avx2(__m256 a);

__m512 Sleef_tgammaf16_u10(__m512 a);
__m512 Sleef_tgammaf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_tgammaf_u10 with the same accuracy specification.


Vectorized double precision log gamma functions

Synopsis

#include <sleef.h>

__m128d Sleef_lgammad2_u10(__m128d a);
__m128d Sleef_lgammad2_u10sse2(__m128d a);
__m128d Sleef_lgammad2_u10sse4(__m128d a);
__m128d Sleef_lgammad2_u10avx2128(__m128d a);

__m256d Sleef_lgammad4_u10(__m256d a);
__m256d Sleef_lgammad4_u10avx(__m256d a);
__m256d Sleef_lgammad4_u10fma4(__m256d a);
__m256d Sleef_lgammad4_u10avx2(__m256d a);

__m512d Sleef_lgammad8_u10(__m512d a);
__m512d Sleef_lgammad8_u10avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_lgamma_u10 with the same accuracy specification.


Vectorized single precision log gamma functions

Synopsis

#include <sleef.h>

__m128 Sleef_lgammaf4_u10(__m128 a);
__m128 Sleef_lgammaf4_u10sse2(__m128 a);
__m128 Sleef_lgammaf4_u10sse4(__m128 a);
__m128 Sleef_lgammaf4_u10avx2128(__m128 a);

__m256 Sleef_lgammaf8_u10(__m256 a);
__m256 Sleef_lgammaf8_u10avx(__m256 a);
__m256 Sleef_lgammaf8_u10fma4(__m256 a);
__m256 Sleef_lgammaf8_u10avx2(__m256 a);

__m512 Sleef_lgammaf16_u10(__m512 a);
__m512 Sleef_lgammaf16_u10avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_lgammaf_u10 with the same accuracy specification.

Nearest integer functions

Vectorized double precision functions for rounding to integer towards zero

Synopsis

#include <sleef.h>

__m128d Sleef_truncd2(__m128d a);
__m128d Sleef_truncd2_sse2(__m128d a);
__m128d Sleef_truncd2_sse4(__m128d a);
__m128d Sleef_truncd2_avx2128(__m128d a);

__m256d Sleef_truncd4(__m256d a);
__m256d Sleef_truncd4_avx(__m256d a);
__m256d Sleef_truncd4_fma4(__m256d a);
__m256d Sleef_truncd4_avx2(__m256d a);

__m512d Sleef_truncd8(__m512d a);
__m512d Sleef_truncd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_trunc with the same accuracy specification.


Vectorized single precision functions for rounding to integer towards zero

Synopsis

#include <sleef.h>

__m128 Sleef_truncf4(__m128 a);
__m128 Sleef_truncf4_sse2(__m128 a);
__m128 Sleef_truncf4_sse4(__m128 a);
__m128 Sleef_truncf4_avx2128(__m128 a);

__m256 Sleef_truncf8(__m256 a);
__m256 Sleef_truncf8_avx(__m256 a);
__m256 Sleef_truncf8_fma4(__m256 a);
__m256 Sleef_truncf8_avx2(__m256 a);

__m512 Sleef_truncf16(__m512 a);
__m512 Sleef_truncf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_truncf with the same accuracy specification.


Vectorized double precision functions for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

__m128d Sleef_floord2(__m128d a);
__m128d Sleef_floord2_sse2(__m128d a);
__m128d Sleef_floord2_sse4(__m128d a);
__m128d Sleef_floord2_avx2128(__m128d a);

__m256d Sleef_floord4(__m256d a);
__m256d Sleef_floord4_avx(__m256d a);
__m256d Sleef_floord4_fma4(__m256d a);
__m256d Sleef_floord4_avx2(__m256d a);

__m512d Sleef_floord8(__m512d a);
__m512d Sleef_floord8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_floor with the same accuracy specification.


Vectorized single precision functions for rounding to integer towards negative infinity

Synopsis

#include <sleef.h>

__m128 Sleef_floorf4(__m128 a);
__m128 Sleef_floorf4_sse2(__m128 a);
__m128 Sleef_floorf4_sse4(__m128 a);
__m128 Sleef_floorf4_avx2128(__m128 a);

__m256 Sleef_floorf8(__m256 a);
__m256 Sleef_floorf8_avx(__m256 a);
__m256 Sleef_floorf8_fma4(__m256 a);
__m256 Sleef_floorf8_avx2(__m256 a);

__m512 Sleef_floorf16(__m512 a);
__m512 Sleef_floorf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_floorf with the same accuracy specification.


Vectorized double precision functions for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

__m128d Sleef_ceild2(__m128d a);
__m128d Sleef_ceild2_sse2(__m128d a);
__m128d Sleef_ceild2_sse4(__m128d a);
__m128d Sleef_ceild2_avx2128(__m128d a);

__m256d Sleef_ceild4(__m256d a);
__m256d Sleef_ceild4_avx(__m256d a);
__m256d Sleef_ceild4_fma4(__m256d a);
__m256d Sleef_ceild4_avx2(__m256d a);

__m512d Sleef_ceild8(__m512d a);
__m512d Sleef_ceild8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_ceil with the same accuracy specification.


Vectorized single precision functions for rounding to integer towards positive infinity

Synopsis

#include <sleef.h>

__m128 Sleef_ceilf4(__m128 a);
__m128 Sleef_ceilf4_sse2(__m128 a);
__m128 Sleef_ceilf4_sse4(__m128 a);
__m128 Sleef_ceilf4_avx2128(__m128 a);

__m256 Sleef_ceilf8(__m256 a);
__m256 Sleef_ceilf8_avx(__m256 a);
__m256 Sleef_ceilf8_fma4(__m256 a);
__m256 Sleef_ceilf8_avx2(__m256 a);

__m512 Sleef_ceilf16(__m512 a);
__m512 Sleef_ceilf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_ceilf with the same accuracy specification.


Vectorized double precision functions for rounding to nearest integer

Synopsis

#include <sleef.h>

__m128d Sleef_roundd2(__m128d a);
__m128d Sleef_roundd2_sse2(__m128d a);
__m128d Sleef_roundd2_sse4(__m128d a);
__m128d Sleef_roundd2_avx2128(__m128d a);

__m256d Sleef_roundd4(__m256d a);
__m256d Sleef_roundd4_avx(__m256d a);
__m256d Sleef_roundd4_fma4(__m256d a);
__m256d Sleef_roundd4_avx2(__m256d a);

__m512d Sleef_roundd8(__m512d a);
__m512d Sleef_roundd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_round with the same accuracy specification.


Vectorized single precision functions for rounding to nearest integer

Synopsis

#include <sleef.h>

__m128 Sleef_roundf4(__m128 a);
__m128 Sleef_roundf4_sse2(__m128 a);
__m128 Sleef_roundf4_sse4(__m128 a);
__m128 Sleef_roundf4_avx2128(__m128 a);

__m256 Sleef_roundf8(__m256 a);
__m256 Sleef_roundf8_avx(__m256 a);
__m256 Sleef_roundf8_fma4(__m256 a);
__m256 Sleef_roundf8_avx2(__m256 a);

__m512 Sleef_roundf16(__m512 a);
__m512 Sleef_roundf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_roundf with the same accuracy specification.


Vectorized double precision functions for rounding to nearest integer

Synopsis

#include <sleef.h>

__m128d Sleef_rintd2(__m128d a);
__m128d Sleef_rintd2_sse2(__m128d a);
__m128d Sleef_rintd2_sse4(__m128d a);
__m128d Sleef_rintd2_avx2128(__m128d a);

__m256d Sleef_rintd4(__m256d a);
__m256d Sleef_rintd4_avx(__m256d a);
__m256d Sleef_rintd4_fma4(__m256d a);
__m256d Sleef_rintd4_avx2(__m256d a);

__m512d Sleef_rintd8(__m512d a);
__m512d Sleef_rintd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_rint with the same accuracy specification.


Vectorized single precision functions for rounding to nearest integer

Synopsis

#include <sleef.h>

__m128 Sleef_rintf4(__m128 a);
__m128 Sleef_rintf4_sse2(__m128 a);
__m128 Sleef_rintf4_sse4(__m128 a);
__m128 Sleef_rintf4_avx2128(__m128 a);

__m256 Sleef_rintf8(__m256 a);
__m256 Sleef_rintf8_avx(__m256 a);
__m256 Sleef_rintf8_fma4(__m256 a);
__m256 Sleef_rintf8_avx2(__m256 a);

__m512 Sleef_rintf16(__m512 a);
__m512 Sleef_rintf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_rintf with the same accuracy specification.

Other functions

Vectorized double precision functions for fused multiply-accumulation

Synopsis

#include <sleef.h>

__m128d Sleef_fmad2(__m128d a, __m128d b, __m128d c);
__m128d Sleef_fmad2_sse2(__m128d a, __m128d b, __m128d c);
__m128d Sleef_fmad2_sse4(__m128d a, __m128d b, __m128d c);
__m128d Sleef_fmad2_avx2128(__m128d a, __m128d b, __m128d c);

__m256d Sleef_fmad4(__m256d a, __m256d b, __m256d c);
__m256d Sleef_fmad4_avx(__m256d a, __m256d b, __m256d c);
__m256d Sleef_fmad4_fma4(__m256d a, __m256d b, __m256d c);
__m256d Sleef_fmad4_avx2(__m256d a, __m256d b, __m256d c);

__m512d Sleef_fmad8(__m512d a, __m512d b, __m512d c);
__m512d Sleef_fmad8_avx512f(__m512d a, __m512d b, __m512d c);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fma with the same accuracy specification.


Vectorized single precision functions for fused multiply-accumulation

Synopsis

#include <sleef.h>

__m128 Sleef_fmaf4(__m128 a, __m128 b, __m128 c);
__m128 Sleef_fmaf4_sse2(__m128 a, __m128 b, __m128 c);
__m128 Sleef_fmaf4_sse4(__m128 a, __m128 b, __m128 c);
__m128 Sleef_fmaf4_avx2128(__m128 a, __m128 b, __m128 c);

__m256 Sleef_fmaf8(__m256 a, __m256 b, __m256 c);
__m256 Sleef_fmaf8_avx(__m256 a, __m256 b, __m256 c);
__m256 Sleef_fmaf8_fma4(__m256 a, __m256 b, __m256 c);
__m256 Sleef_fmaf8_avx2(__m256 a, __m256 b, __m256 c);

__m512 Sleef_fmaf16(__m512 a, __m512 b, __m512 c);
__m512 Sleef_fmaf16_avx512f(__m512 a, __m512 b, __m512 c);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmaf with the same accuracy specification.


Vectorized double precision FP remainder

Synopsis

#include <sleef.h>

__m128d Sleef_fmodd2(__m128d a, __m128d b);
__m128d Sleef_fmodd2_sse2(__m128d a, __m128d b);
__m128d Sleef_fmodd2_sse4(__m128d a, __m128d b);
__m128d Sleef_fmodd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_fmodd4(__m256d a, __m256d b);
__m256d Sleef_fmodd4_avx(__m256d a, __m256d b);
__m256d Sleef_fmodd4_fma4(__m256d a, __m256d b);
__m256d Sleef_fmodd4_avx2(__m256d a, __m256d b);

__m512d Sleef_fmodd8(__m512d a, __m512d b);
__m512d Sleef_fmodd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmod with the same accuracy specification.


Vectorized single precision FP remainder

Synopsis

#include <sleef.h>

__m128 Sleef_fmodf4(__m128 a, __m128 b);
__m128 Sleef_fmodf4_sse2(__m128 a, __m128 b);
__m128 Sleef_fmodf4_sse4(__m128 a, __m128 b);
__m128 Sleef_fmodf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_fmodf8(__m256 a, __m256 b);
__m256 Sleef_fmodf8_avx(__m256 a, __m256 b);
__m256 Sleef_fmodf8_fma4(__m256 a, __m256 b);
__m256 Sleef_fmodf8_avx2(__m256 a, __m256 b);

__m512 Sleef_fmodf16(__m512 a, __m512 b);
__m512 Sleef_fmodf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmodf with the same accuracy specification.


Vectorized double precision functions for multiplying by integral power of 2

Synopsis

#include <sleef.h>

__m128d Sleef_ldexpd2(__m128d a, __m128i b);
__m128d Sleef_ldexpd2_sse2(__m128d a, __m128i b);
__m128d Sleef_ldexpd2_sse4(__m128d a, __m128i b);
__m128d Sleef_ldexpd2_avx2128(__m128d a, __m128i b);

__m256d Sleef_ldexpd4(__m256d a, __m128i b);
__m256d Sleef_ldexpd4_avx(__m256d a, __m128i b);
__m256d Sleef_ldexpd4_fma4(__m256d a, __m128i b);
__m256d Sleef_ldexpd4_avx2(__m256d a, __m128i b);

__m512d Sleef_ldexpd8(__m512d a, __m256i b);
__m512d Sleef_ldexpd8_avx512f(__m512d a, __m256i b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_ldexp with the same accuracy specification.


Vectorized double precision functions for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

__m128d Sleef_frfrexpd2(__m128d a);
__m128d Sleef_frfrexpd2_sse2(__m128d a);
__m128d Sleef_frfrexpd2_sse4(__m128d a);
__m128d Sleef_frfrexpd2_avx2128(__m128d a);

__m256d Sleef_frfrexpd4(__m256d a);
__m256d Sleef_frfrexpd4_avx(__m256d a);
__m256d Sleef_frfrexpd4_fma4(__m256d a);
__m256d Sleef_frfrexpd4_avx2(__m256d a);

__m512d Sleef_frfrexpd8(__m512d a);
__m512d Sleef_frfrexpd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_frfrexp with the same accuracy specification.


Vectorized single precision functions for obtaining fractional component of an FP number

Synopsis

#include <sleef.h>

__m128 Sleef_frfrexpf4(__m128 a);
__m128 Sleef_frfrexpf4_sse2(__m128 a);
__m128 Sleef_frfrexpf4_sse4(__m128 a);
__m128 Sleef_frfrexpf4_avx2128(__m128 a);

__m256 Sleef_frfrexpf8(__m256 a);
__m256 Sleef_frfrexpf8_avx(__m256 a);
__m256 Sleef_frfrexpf8_fma4(__m256 a);
__m256 Sleef_frfrexpf8_avx2(__m256 a);

__m512 Sleef_frfrexpf16(__m512 a);
__m512 Sleef_frfrexpf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_frfrexpf with the same accuracy specification.


Vectorized double precision function for obtaining integral component of an FP number

Synopsis

#include <sleef.h>

__m128i Sleef_expfrexpd2(__m128d a);
__m128i Sleef_expfrexpd2_sse2(__m128d a);
__m128i Sleef_expfrexpd2_sse4(__m128d a);
__m128i Sleef_expfrexpd2_avx2128(__m128d a);

__m128i Sleef_expfrexpd4(__m256d a);
__m128i Sleef_expfrexpd4_avx(__m256d a);
__m128i Sleef_expfrexpd4_fma4(__m256d a);
__m128i Sleef_expfrexpd4_avx2(__m256d a);

__m256i Sleef_expfrexpd8(__m512d a);
__m256i Sleef_expfrexpd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_expfrexp with the same accuracy specification.


Vectorized double precision functions for getting integer exponent

Synopsis

#include <sleef.h>

__m128i Sleef_ilogbd2(__m128d a);
__m128i Sleef_ilogbd2_sse2(__m128d a);
__m128i Sleef_ilogbd2_sse4(__m128d a);
__m128i Sleef_ilogbd2_avx2128(__m128d a);

__m128i Sleef_ilogbd4(__m256d a);
__m128i Sleef_ilogbd4_avx(__m256d a);
__m128i Sleef_ilogbd4_fma4(__m256d a);
__m128i Sleef_ilogbd4_avx2(__m256d a);

__m256i Sleef_ilogbd8(__m512d a);
__m256i Sleef_ilogbd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_ilogb with the same accuracy specification.


Vectorized double precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef___m128d_2 Sleef_modfd2(__m128d a);
Sleef___m128d_2 Sleef_modfd2_sse2(__m128d a);
Sleef___m128d_2 Sleef_modfd2_sse4(__m128d a);
Sleef___m128d_2 Sleef_modfd2_avx2128(__m128d a);

Sleef___m256d_2 Sleef_modfd4(__m256d a);
Sleef___m256d_2 Sleef_modfd4_avx(__m256d a);
Sleef___m256d_2 Sleef_modfd4_fma4(__m256d a);
Sleef___m256d_2 Sleef_modfd4_avx2(__m256d a);

Sleef___m512d_2 Sleef_modfd8(__m512d a);
Sleef___m512d_2 Sleef_modfd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_modf with the same accuracy specification.


Vectorized single precision signed integral and fractional values

Synopsis

#include <sleef.h>

Sleef___m128_2 Sleef_modff4(__m128 a);
Sleef___m128_2 Sleef_modff4_sse2(__m128 a);
Sleef___m128_2 Sleef_modff4_sse4(__m128 a);
Sleef___m128_2 Sleef_modff4_avx2128(__m128 a);

Sleef___m256_2 Sleef_modff8(__m256 a);
Sleef___m256_2 Sleef_modff8_avx(__m256 a);
Sleef___m256_2 Sleef_modff8_fma4(__m256 a);
Sleef___m256_2 Sleef_modff8_avx2(__m256 a);

Sleef___m512_2 Sleef_modff16(__m512 a);
Sleef___m512_2 Sleef_modff16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_modff with the same accuracy specification.


Vectorized double precision functions for calculating the absolute value

Synopsis

#include <sleef.h>

__m128d Sleef_fabsd2(__m128d a);
__m128d Sleef_fabsd2_sse2(__m128d a);
__m128d Sleef_fabsd2_sse4(__m128d a);
__m128d Sleef_fabsd2_avx2128(__m128d a);

__m256d Sleef_fabsd4(__m256d a);
__m256d Sleef_fabsd4_avx(__m256d a);
__m256d Sleef_fabsd4_fma4(__m256d a);
__m256d Sleef_fabsd4_avx2(__m256d a);

__m512d Sleef_fabsd8(__m512d a);
__m512d Sleef_fabsd8_avx512f(__m512d a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fabs with the same accuracy specification.


Vectorized single precision functions for calculating the absolute value

Synopsis

#include <sleef.h>

__m128 Sleef_fabsf4(__m128 a);
__m128 Sleef_fabsf4_sse2(__m128 a);
__m128 Sleef_fabsf4_sse4(__m128 a);
__m128 Sleef_fabsf4_avx2128(__m128 a);

__m256 Sleef_fabsf8(__m256 a);
__m256 Sleef_fabsf8_avx(__m256 a);
__m256 Sleef_fabsf8_fma4(__m256 a);
__m256 Sleef_fabsf8_avx2(__m256 a);

__m512 Sleef_fabsf16(__m512 a);
__m512 Sleef_fabsf16_avx512f(__m512 a);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fabsf with the same accuracy specification.


Vectorized double precision functions for copying signs

Synopsis

#include <sleef.h>

__m128d Sleef_copysignd2(__m128d a, __m128d b);
__m128d Sleef_copysignd2_sse2(__m128d a, __m128d b);
__m128d Sleef_copysignd2_sse4(__m128d a, __m128d b);
__m128d Sleef_copysignd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_copysignd4(__m256d a, __m256d b);
__m256d Sleef_copysignd4_avx(__m256d a, __m256d b);
__m256d Sleef_copysignd4_fma4(__m256d a, __m256d b);
__m256d Sleef_copysignd4_avx2(__m256d a, __m256d b);

__m512d Sleef_copysignd8(__m512d a, __m512d b);
__m512d Sleef_copysignd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_copysign with the same accuracy specification.


Vectorized single precision functions for copying signs

Synopsis

#include <sleef.h>

__m128 Sleef_copysignf4(__m128 a, __m128 b);
__m128 Sleef_copysignf4_sse2(__m128 a, __m128 b);
__m128 Sleef_copysignf4_sse4(__m128 a, __m128 b);
__m128 Sleef_copysignf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_copysignf8(__m256 a, __m256 b);
__m256 Sleef_copysignf8_avx(__m256 a, __m256 b);
__m256 Sleef_copysignf8_fma4(__m256 a, __m256 b);
__m256 Sleef_copysignf8_avx2(__m256 a, __m256 b);

__m512 Sleef_copysignf16(__m512 a, __m512 b);
__m512 Sleef_copysignf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_copysignf with the same accuracy specification.


Vectorized double precision functions for determining maximum of two values

Synopsis

#include <sleef.h>

__m128d Sleef_fmaxd2(__m128d a, __m128d b);
__m128d Sleef_fmaxd2_sse2(__m128d a, __m128d b);
__m128d Sleef_fmaxd2_sse4(__m128d a, __m128d b);
__m128d Sleef_fmaxd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_fmaxd4(__m256d a, __m256d b);
__m256d Sleef_fmaxd4_avx(__m256d a, __m256d b);
__m256d Sleef_fmaxd4_fma4(__m256d a, __m256d b);
__m256d Sleef_fmaxd4_avx2(__m256d a, __m256d b);

__m512d Sleef_fmaxd8(__m512d a, __m512d b);
__m512d Sleef_fmaxd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmax with the same accuracy specification.


Vectorized single precision functions for determining maximum of two values

Synopsis

#include <sleef.h>

__m128 Sleef_fmaxf4(__m128 a, __m128 b);
__m128 Sleef_fmaxf4_sse2(__m128 a, __m128 b);
__m128 Sleef_fmaxf4_sse4(__m128 a, __m128 b);
__m128 Sleef_fmaxf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_fmaxf8(__m256 a, __m256 b);
__m256 Sleef_fmaxf8_avx(__m256 a, __m256 b);
__m256 Sleef_fmaxf8_fma4(__m256 a, __m256 b);
__m256 Sleef_fmaxf8_avx2(__m256 a, __m256 b);

__m512 Sleef_fmaxf16(__m512 a, __m512 b);
__m512 Sleef_fmaxf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmaxf with the same accuracy specification.


Vectorized double precision functions for determining minimum of two values

Synopsis

#include <sleef.h>

__m128d Sleef_fmind2(__m128d a, __m128d b);
__m128d Sleef_fmind2_sse2(__m128d a, __m128d b);
__m128d Sleef_fmind2_sse4(__m128d a, __m128d b);
__m128d Sleef_fmind2_avx2128(__m128d a, __m128d b);

__m256d Sleef_fmind4(__m256d a, __m256d b);
__m256d Sleef_fmind4_avx(__m256d a, __m256d b);
__m256d Sleef_fmind4_fma4(__m256d a, __m256d b);
__m256d Sleef_fmind4_avx2(__m256d a, __m256d b);

__m512d Sleef_fmind8(__m512d a, __m512d b);
__m512d Sleef_fmind8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fmin with the same accuracy specification.


Vectorized single precision functions for determining minimum of two values

Synopsis

#include <sleef.h>

__m128 Sleef_fminf4(__m128 a, __m128 b);
__m128 Sleef_fminf4_sse2(__m128 a, __m128 b);
__m128 Sleef_fminf4_sse4(__m128 a, __m128 b);
__m128 Sleef_fminf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_fminf8(__m256 a, __m256 b);
__m256 Sleef_fminf8_avx(__m256 a, __m256 b);
__m256 Sleef_fminf8_fma4(__m256 a, __m256 b);
__m256 Sleef_fminf8_avx2(__m256 a, __m256 b);

__m512 Sleef_fminf16(__m512 a, __m512 b);
__m512 Sleef_fminf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fminf with the same accuracy specification.


Vectorized double precision functions to calculate positive difference of two values

Synopsis

#include <sleef.h>

__m128d Sleef_fdimd2(__m128d a, __m128d b);
__m128d Sleef_fdimd2_sse2(__m128d a, __m128d b);
__m128d Sleef_fdimd2_sse4(__m128d a, __m128d b);
__m128d Sleef_fdimd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_fdimd4(__m256d a, __m256d b);
__m256d Sleef_fdimd4_avx(__m256d a, __m256d b);
__m256d Sleef_fdimd4_fma4(__m256d a, __m256d b);
__m256d Sleef_fdimd4_avx2(__m256d a, __m256d b);

__m512d Sleef_fdimd8(__m512d a, __m512d b);
__m512d Sleef_fdimd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fdim with the same accuracy specification.


Vectorized single precision functions to calculate positive difference of two values

Synopsis

#include <sleef.h>

__m128 Sleef_fdimf4(__m128 a, __m128 b);
__m128 Sleef_fdimf4_sse2(__m128 a, __m128 b);
__m128 Sleef_fdimf4_sse4(__m128 a, __m128 b);
__m128 Sleef_fdimf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_fdimf8(__m256 a, __m256 b);
__m256 Sleef_fdimf8_avx(__m256 a, __m256 b);
__m256 Sleef_fdimf8_fma4(__m256 a, __m256 b);
__m256 Sleef_fdimf8_avx2(__m256 a, __m256 b);

__m512 Sleef_fdimf16(__m512 a, __m512 b);
__m512 Sleef_fdimf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_fdimf with the same accuracy specification.


Vectorized double precision functions for obtaining the next representable FP value

Synopsis

#include <sleef.h>

__m128d Sleef_nextafterd2(__m128d a, __m128d b);
__m128d Sleef_nextafterd2_sse2(__m128d a, __m128d b);
__m128d Sleef_nextafterd2_sse4(__m128d a, __m128d b);
__m128d Sleef_nextafterd2_avx2128(__m128d a, __m128d b);

__m256d Sleef_nextafterd4(__m256d a, __m256d b);
__m256d Sleef_nextafterd4_avx(__m256d a, __m256d b);
__m256d Sleef_nextafterd4_fma4(__m256d a, __m256d b);
__m256d Sleef_nextafterd4_avx2(__m256d a, __m256d b);

__m512d Sleef_nextafterd8(__m512d a, __m512d b);
__m512d Sleef_nextafterd8_avx512f(__m512d a, __m512d b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_nextafter with the same accuracy specification.


Vectorized single precision functions for obtaining the next representable FP value

Synopsis

#include <sleef.h>

__m128 Sleef_nextafterf4(__m128 a, __m128 b);
__m128 Sleef_nextafterf4_sse2(__m128 a, __m128 b);
__m128 Sleef_nextafterf4_sse4(__m128 a, __m128 b);
__m128 Sleef_nextafterf4_avx2128(__m128 a, __m128 b);

__m256 Sleef_nextafterf8(__m256 a, __m256 b);
__m256 Sleef_nextafterf8_avx(__m256 a, __m256 b);
__m256 Sleef_nextafterf8_fma4(__m256 a, __m256 b);
__m256 Sleef_nextafterf8_avx2(__m256 a, __m256 b);

__m512 Sleef_nextafterf16(__m512 a, __m512 b);
__m512 Sleef_nextafterf16_avx512f(__m512 a, __m512 b);

Link with -lsleef.

Description

These are the vectorized functions of Sleef_nextafterf with the same accuracy specification.

sleef-3.3.1/doc/ladle.svg000066400000000000000000000574451333715643700152160ustar00rootroot00000000000000 sleef-3.3.1/doc/sleeflogo.pptx000066400000000000000000002640711333715643700163030ustar00rootroot00000000000000PK!kÀ†î2[Content_Types].xml ¢( Ì—ÛnÛ0 †ïì ݶ’nëº!N/v¸Ú¡@»Ðl:Ѧ$&KÞ~”“fnà.Kc#¾1 Käÿ‘’iqr½Ò*Y‚Òšœ³KÀ¶”f–³ïwŸÒ+–¦ÊÈÙ»ž>6¹[; Y›³9¢{Çy(æ EȬC3•õZ ýŒ;Qü3à£Ñ%/¬A0˜bôÁ¦“P‰…ÂäãŠ^oHœ™±äýf]”Ê™ÔÑ>¾ç­ «V‹UgÚm~:h—©'Úm<¨°'$œS²H9äKSîÅŸncÏȲ^æÒ…” G¨âÌÃØ›[»o´i^–Ü_…¦ qç;r[ eÿöÔ‚j«JPÚb¡ÉIÖt¦Õƒa¦…4÷A<~é€ñÆ`Ü5YÃ÷1miúá8†à¢—LCðòì¯ÎBÏö·.t­¾s|h–~÷B°s|ˆ©^¯Ÿ§ µ›ƒŠâ‡‚[\+è<ïø×õ!Šº\|k»Àm%Ø NOÂ^Ål=•©Ÿ ±‰÷©LýԌӘú©"§1½îº¶tpž.Èôf€LWdz;@¦ñhˆPçªätu®éÔãx8>1÷­B´NÝNÀ£„]³ÐvÏÞ)R“p¼àÞ…bVBÙ¢ÍëŽoúÿÿPK!høt¡â _rels/.rels ¢( ¬’ÛJ1†ïß!Ì}7Û*"ÒloDèÈúc2»ÝH¦Ò¾½¡àaa-‚½œÓ?_òÏz³w£x§”mð –U ‚¼Æú^Ásû°¸‘½Á1xRp  ›æòbýD#rʃYŸ ÌñNʬr˜«É—J’C.aêeDý†=ÉU]ßÈôSš‰¦Øik®@´‡X6ÿG[:b4È(uH´ˆ©%¶å-¢ÅÔ+0A?–t>vT…ä<Ðê¼@<ìÜ‹G;Π|Õª×Hýo@Ë¿…®³šîƒÞ9ò'홼!sÚ4Œñ“HN.³ùÿÿPK!“¿EšÙ¾ ppt/slides/_rels/slide1.xml.rels¬ÍjÃ0„ï…¾ƒØ{µv¥”ȹ”B §’>Àb­më­ê·¯J/6zéqfÙo†Ù¾ü¬nœÅÅ` Õ (}´.Œ>ÏïO/ ¤P°4ÇÀ8tûž©Ô'™\U)A L¥¤WDé'ö$:&õ2Äì©T™GLÔ_hdÜ5Í3æ5º S­|´;Pç%Õä¿Ùq\Ïo±¿zåN:_³+òÈÅ€ÖèÙ:úõ[Í~¼_£ýÏ2;Ë'Zâµlʬ|Á•hu]ñ§nVï¾ÿÿPK!c\#´Á7 ppt/slides/_rels/slide2.xml.rels„ÁjÃ0Dï…üƒØ{$;‡RŠe_B Sq>`‘Ö¶ˆ- ­꿯Ž6zœæÍNÓý.³xQb¼†ZV È›`5ÜûËñ gôçàIÃJ ]{øh~hÆ\B<¹È¢P`‘Ö¶ˆ- ­꿯Ž6zœæÍNÓý.³xQb¼†ZV È›`5ÜûËñ gôçàIÃJ ]{øh~hÆ\B<¹È¢P&ó„Åâ ÿÿPK! 'ÿÔppt/presentation.xmlì—ßoÚ0Çß'íˆü:ÑŸ„¨¡‚uL›: ­Ýà&Ü:vd›:íßÙ `@“*í5O89ß×çgçîúf[3ï…HE/Pp5Dᥨ(_è×Ã|!OiÌ+Ì'Ú…n&?\7y#‰"\c ®Èp•ã­µnrßWåšÔX]‰†p°-…¬±†G¹ò+‰_A¾f~8¦~)G¿|¿X.iInE¹©aùVDfãPkÚ¨½Zó5w'!M`‹ŠU?°ÒD~«î”>{ãѪ@aâ,Jcà$sóæÈŸ\ûÿpw¥¾U­H2v¼Cãmæ‘cŽ.ÍîÚñ¥9u¼“chn ÷o^¹-Ð8ˆãár Ü(Í’Ì>è]ÿ¼*%!<Þvës¡‰êÜ3Û^Ãn¢"K¼aúlõ½Þ12¹Æ9¼[,d7ú¹Ã&ÙžðàûÂFçNa/,h`Nå] 2ÌV¨ y ó€ïßö+Â&5³S¾ã3ù éŒ@[SÞ=‚÷–‚Ü[lx©Ý‰âySÓZØ]Ö×°7‡ËùÕìöxÜ]^\·Ë»ùáGÛÝrƒ¿Ýl÷wó#¾îŸ_\ïç_cÔ»õEÇåÅÝ|µ™Yü½{s³Z,º]¼¸[nŽ<È~¹žñä‡ÛÕîÐŽ¶ÓŒ¶Û/Æ`;ô!f¶xº¾¦ÏÃî‹ýrI?m^þl¿{º{²7þåË'ûhuõšE›ù–evaÿ`ÁÌ× ÀðÃEýy;ÒüòÕÍþîÃæ—˜[ôêj†ÅMÿÒürùê-ø—‹Óo·¿ºvqûÉ=Ð-O’*W°Dâ eY ÙM‰3X`Ŭ$R^ÆMž¥ŠYI‘ÏI÷)(98e“7‚Ñ)òšùHø¼ª‹Zõ–¤¸cáj…ÍH$NÞU¬Qådí¤<‘¥ ®ƒT$Mšª&%å=ƒªHI¤ [@ÞÃ\.¾Ì“Rµ€©(³˜Ø(LJJ| áѱ‰D*H!iT_*Õ0 R.Á—å`°¡%,[pZ©ù$KÈ5PÐ’XÖ]QÐ’2Ÿ@½¨ä7•XE'“ ã2Jã9Ô‡ñYÒc4|ôð=VCþmzÙ$„Þc7$¾¡÷.ÆqIåð}õÓ!é4òFò½ãû ô×!¡4½ÇuÈö Bïq8ƒÐ{\—㺴Çuø>ˆzëo„Þã:¤P¡÷¸.Æu”ØpØf×QÖA¢ãû‡§D‚ƒ>Œë(7à ã: ÷ôa\G¼ƒ>Œë((wЇq]Öã:|´ò=®Ë†qÅÃÎÃã:Šqôa\g{:'߇̂QIß¡÷¸Qç ô×!„Þã:ćƒÐ{\‡˜ozëý Bïqb³Aè=®C”6½Çuù0®£0H² ¾¡N‘ƒîpûò6„Ù/Çh}5[Ï"tÆgšŸö³èÙÕ쑜_îæGŠ|Ú£¯ÑÿÒ6ÃÜžzaèïwÛ—Ë/¶òHÄÌ³ØøÊNã¹Þ8Üœb0lωÅháÚÏSà†çÎ/¼maxä*N]<í¸í'oÛ= ¼íâðŽ :5m0<÷bøá¹õ‚á¹£ÂÏ  Ï}~xnƒ°ð¦»ÁÏÍ Þô(xá¹#Á¹ÏÀ Î]Üô xÁ9#Éà\ï÷‚›ê¾…6¹!/4K˜­»{!M•ÇåÚ¹:ÉsäÊ ¸­{Á¹ÚÍà\Ãö‚sÅšÁ¹íçª3ƒs-Ù n+Ç Ïõ`?Û^óïê"Ž[6êÀ g¤õ†ÜǤA£Âùc‡ÇC_µN«€Âc®‘ü`Ì×ϱ£Zn®ŸÌ÷óÏñ—ß¾¸[Ým¿Z™½kS ùjþèO0ôúø©ù¾Ü<úõSû€ï®}"ó©ùÿá-!yGÃý[L@Š ¿|yþ´¿ õÜPŠ6“†óɧhûÉÑÃ[꣪Ûx¸Ý7!Ehðª'dyØ{úøæªl‹§¸ü7ˆ»B=úyKSì 8] 8@IŠŽŽÑËÔz #qàep'[€²]lh5ÒqpP†4Ýy:xÇ&†¶RБ8UEÛCÂïÌÝÑI°ØˆNÃ’H5ÚdP—22)'BE™Pçtpå$Rgº<ƒ„ùžÕIÄÔ `‰Dñ³fé §'B,4aB©jJÞ’àDr'BÈä‰BN%R“4\Ãršö4ð&)µÖÎÅŠÓ„;QB´¤”ÃIÐ¥ å`± ³„Ó럠#<7MP¡yIý€Ù ‡zQBó’Ò®•\*Cv¯¬Ä ¡!JRÞ!PMN½q¡II$â%³W"DI |GKÀNÇ¿ÉP£pˆ’”xNc(æ$‘r<œjNRäò„êèé) <›î*†wÁÃë$FŸÌêE„He2«Øœ0™Õ‹h2«íŽÞɬ 59™U»Ïe°ª¦>È« Í*v£íp×L†‰¸ðÁñ];ÞŽîx—S´:™U:a2«æxˆÉ¬NfÕòå?T´Ú ¤È¹Ñj–`«ÂØI 9™U“Šž’ÀT±øÎI;_Ê Ï­Nfµ5&“YmWbŠVÏV›8¥sB‘›NèTplä$'³ý]%›´(4éÁbiÞ¤™¢D(Ë SiùÅ©´Jæ{*­Rr`*­ ò¾–V'«Ê„þàrÀ“URÀS ¸ëc˜bÕ)V}w)à÷ЪiQ*ÚJ3œ+°^R8È?»²Ê½Æ¦ û»ùÌ3*«'«ê¥ ‹±gƪ^:2¾=³ ØKçf€½tdrçÜXÕKH&„Îmö’e–sÛ€½„¤ÈŒk6Û œ—Óœ¢õœN ´VÈŽ^œ Ûµ{ I¤³cU/¥wÜì§%uƒ]¶ð>Øì§%õh×ì'%UÄÈ~¥Îªú)I%QÇçe€ý”¤–(GVOVÕOIª‰3¬ªŸˆT½ 0v†MGµµ»gu‡‡ å!íf&28K"·[+uÈ`‰<ì`0Dv¬ïïêNsiw³êú\Rv¤©h‰ï¼iOG›.ZpÐÛ=Jt(R}ØA2ÓQm§cÝ£ÚÎ>L¥ÛVˆÃTZ'„ºNG¤ð>lfŸ–iNuwk÷Þ¼jöÚ-µ@í§ÝNO箲`ßÛ Í;îÌØÜCá…æÝ7šý`/´ÝýfÀÙ›õƒs³½gŸÔÎå;:m0óƒ·=!0•ñýàìòØÍ^~pÞÚeÀÞ°å‡çíY^î:m_fûÉ/ÕÖZxò˜¼ã‹Å^ºÄæm»I–=/tf*ÕæQØûðB£ï ¼f íÊC»ÍMë­âb‡:mD7›ÜSœcvko¶´‘žuý´y½šÀû¼ynŽoóºñ™ÞÑæuœðÝ”ìVQ3îÝ á›×³”®âáÍë¸Â¬îNÍ|'{×éÜs›ƒ½†‰ûWYZ&5åÞ8³ýг‰Ö/É ‘ªíš‘qMhBsZu€ŽD¢£° 4DGF™Øá {A·*èH¤´®yã²ÙdŒiGÓ‘H)~=2„/KÞZn†z é áÌÓï럌/kì”7Gç–Mâ༳ƒØOE˜yR©˜MâPPê§ “Oö4ÿàë—88Îìø÷S‘™'³Öÿ>$xR8/8'ãÄ‹œ‡ƒƒlí:­—sÁÜiS²>’9™‰R|þ%sTìd¶ßI €óøè‡ !)ÍERÓå|ᥓH<‘0!)θ….† ’HtŽ”iO ,”gàf‡[`餧vƒãïè.ŒðŒ¤°èƒïH µÖ¸9E"¥u“•-‰£$# âdD/¢ÉˆF“ )izF‚1¢8}ÊÿP›RúSV‚žóÐDs¨µÕRehmµ´†j[-‘´¶ZZCµ­–HZ[-­¡ÚVK$­­–ÖPm«%’ÖVKk¨¶Õ’ÒVKª¶ÕIk«¥{¬µ¡tF×Z¥4¢Rò*\8^°ß–¦¸NþèÕ9±MIF .jÇ-ÛÆÇñ’‘8cÞgÙUìïzéHY%=58à­p]$k9/)ª£lµ=¼nRRG¼H 7fëò+6‚ŠÃ¢¼t¥‚Û¤˜ŽxÓFÅlRFÃ>»4”º‘ ¿‰3<àEX­Y¬síµF,¥Êà6ÌaÆW¿Æ¦§ÍÏ^Že¬‘¸ÑÈ¥$1Ö8A³©8:ô Œ³Í}”±Ft¨ÒœRŒ2ÖuZg •ænrWÖR K´*jt´$cŒ5ò)uã]>™.èŠJÑÖÑÉXK»›LÆü˜ìôd¬Câ)¥s2֯Ɨ½ff§yÀ+Z#§>WÅ„¤ç=k³õnLd=kÞµ¨«ñJ7YëHœ)²NÒ)²Ž~À‘5®+HlúØWIŸeˆ(‚ñ¨&+ãw‰£‹ßÒ°2€wq”ek™)ÓDð^ÁK»«àei\ÚêDëH$­S cdµS ‘´¼Ì“Ú^"i#xXçÚ^"i™,S;IÁKÉ.µ¼DÒ¦Û¥pk5.˜þ¤Û« É3 y*ô?¡wûËF¥Û«)à)ß##x´‹qºÕ7)Þ#D t§ˆÙ¤t'‡„óº µËJñz¤pcF¤Û!@æ¤%ÿ|¤l×cÒí`ÁU“‚H·' ÎÓ ½)Ô(ž¤™"ß*qÂÓÒ¬MHœ¢i2 Íõﺤ¾‹ƒm5%êõNö|¸OP' ±tçã|‚²Œ¹Ôï'o>Ò'hê≮ë¤ÑGúy¦PœNÞ|¤O@+f©Fúeª!$µÀHŸî~X8yó‘>A™¤ BR¸Gú*c-ƒ~-‰“å®y+Ô¸a©6cî9/ö|Ÿ ÀUË´l~2çûûP Ñ9Û'Èâ «¤#UÂ8Ÿ Ï ±ƒtÎ÷ JôÐSï±ÿýH…0Æ'Èi#FŠÔyÜ á#ìHIÒÕþyHã>Æ!ÀEÍIxRŒq ÀbƃòÏÅ5ðcœ:»Â‚øv Š2VÈåùNé3r¤sqÀ˜DA‘`g˜ÞSPÂ] kÎó^±ðÒI0Ê)¨’·n‡ I‘ç`ÇaJvÌ µÁ(§ óØDTBR!hµ›(ÐYkiàµt$ÎäŒJLNA49!m#uÀäLNÁ¸LÁäà´ëÉ)à–‚É)Ó¬?e ÆU†g h|j7¦zkñ2´ÀNŠÂd]ý>µ ´Î‡ÄÑe$d„ ÍHHœpFBºÚŒ„ÄÑe$Ü\²wÁARæ$d®@Û¼àà(s2a¨ÉIHxp¥Êý¹m¡ÂÁQæ$œd¡6'!‘´9 ™-T»©RºRžÕ… ‰¤ÍI8­m^HÚœ„jµû!‘ eNBʶ:'!‘”9‰TÆýÚ\ƒ¤LŒ¡#q”d¤µWp*T¼o…ŠÉýøA»9º]$§çÇ(å¹"é.݉ÉýP–D¤'1¹&„ŸÜº×æ{t?*{ÈŸøÀ¡lç»túÇä%#]–l¤ûaóÒ‘Ê*—ýHÊv—øÀ²9±Ï¸> Œ^7ú¤#û$pr\ðýH}…Ú'aÜDÁ2ð•ý@ó:ŸgæeöŒÊ~ í.¼h2æì~à° 0ìŒË~+LæÜì,œf½Þ™ûáL§»"Mcôi†‹ãŽ4× ¦ ŒÓr™Ë~$|sQ@sJ02û[3’:`dö‹+I%02ûA‡¡†ß‘TcÝÕqZ.Gf?t‚*ÂÈì‡Ê-xÙÉýº‡:2&÷cr?p¨÷ä~ø{µe@ñ>¹ik6¦Mª¬n°­(ìæHO»D&÷ãÂvM†—NzhÜ‹hr?"_¦ìÇ”ý˜²SöCa²¥ËrNñ寙ý˜Ü“¤×û1e?.¢)ûÁEž3²Ý=MŠMªg¸^2²Žb®Ð¤Á$Ò©øâ¥#Ø3³^:Nèà װƖH§â‹—ŽÌ¡šÖÓñÅ/™B=ÓýðÒ‘qOË%G_¼Td!­§ã²^ 2oŠÖÓñÅ/§Ž‚/ºœ®Ìv­§~:Ò•8/ûá§#%Z‘‘z øâ§!UÀyî‡ŸŽ”æ3³~BRàåœS|ñ’J'dh%×D$_ HÙ?!©ÎÌ~ø IE@­§«ó@ñÅOHê´žêU"²^BN½*·ÀA:õ„z 9JDIGâ(ÉHÅs^öÃ;©w&÷åä~`Ûíä~ô´èÉýh{?¼šÆéãxïÜ øt·¢øòFÒg©òº4wÀ¸Mz“ûaz\OÙ?ÓIOâÎý¸ ¼Ä§‡#Ý4=¿Å¦Grñjc‡Ÿ¢ùæùÕŒoBßmÑ««Ùb»ÙVÇåo ¤^Ÿ¾â®, cH`Ñ€døÙœï©F†W!‘ÓA”¦Häl2b‰œB†lJdÓ“ªž3„T"—ƒ(Ãï—Èæä45eøò¹D"‘›AȆKl|Äc}&Æe;Ô‡ñ$}§ÑaLú0^£#–ôaÜF·9èÃøîrЇqE‚ú0ž£Sˆôa\GQ›DÇwÁu,6VCî—‹c´¾š­gÑñjvœEû«Ù~=»š=#œùån~$ÅÚþ}}5kì£Û«™5Ìôç»íËå[x$5Ë1¦yéìCœàÖ ÏAÛ¾| ßBµŸ;3ºÅÜ´×ûÁ¹ŸÁMØn;ai9àñƒÓÁÀ) +÷¬÷À/ž­-'狤,nÕãW•”ÀˆVœç—âÚŠe÷ äe~í_ËÔ 7¯[‚.!wÉ8l1K`:ÑÏXÖe‘³$eq®iñ hcmbVigö0K~zNRï9Kà>çA 8ò]›¶@í§]“Í4À&Gé6)IØl2óó»àVm/ ñÎͨÜðäæ Ÿæ¦é´ûÊxޏUoßà”Y“¥Îâ¦X"œJ¼Y–UR™nù†ýlóGöž-5—Š»¢ì2œ“£ýàúsÚ)˜³åzóqÇæÜ™f—Ôm`šÓZ^è‡4Lû‹õö°dV"5fxªÓg¤_Ž?[nïHg¶ëÕõãÕzm¾,n—wË×ûèåÊñø*™Ñ¯×/î>Û^óï Üo¿þÕÍMï×þÐŽb;ÖR¤ n£4o¶D¹ez€ï.»'û?Àçñõziˆo>_ÞD«khÖÔ uÃ3åùb±ÜùA·óëeèÖ4 |ÚÝØv€öÑݱù -<¡.on`@:dv³x0Fî0 åíæ„|·Úl÷÷ÍlYYÊ Ï Ä ³»<¾úh{ýš†{†Ï'ûh\¼Åkƒ8Ï7‹Û-LÚ⸧!ð ǧ„h¾ìè7Xçh¾~¾@ËÍõ“ù~þ9þòÛw«»íW+X½Y´6±ÄWóG¿x‚¡×ÇOÍ÷åæÑ¯ŸÚĈxwí™×h^"~¿»Ü¼|j_êâ—/1öêösmæwË«ÙÛ7xûͽýæÞ¾ù÷èí›ÿxûæÍÛoþß##û»KàzDƒƒ̒ИOö´6rtfùå«›½aí-˜“¸¦ ÃÍžE’ã»Øî)[¾:F à.ï¼Ï¢Y½ê¾j·g‰‰èx xùæÍ_ÂG`ЄÖöÄ×'†n§{]_ï绫Ùf»YÂûè^¡v?yq„hØ‘ùõÞÿ!+óËýý/Œ_Ðé…ÙxøV1­!ÅÑõjÄœñÓáîøñz970/¯çÇÕ&:¾Þ-oæ ¼&¨‹ýóÕ|íVÇÅíãùÝj Dr÷·óýa‰Ei ž†êøáÓO?ùä1Ù3xQø¿ùí‹î{b³ãòôæÙó/Ø)Ä¿Æõ}ü sVÅìWãgŸmœbè ˾UÖàfc8$ľp®4Á¼çì›}ÜþõxØ's÷+IÃÁßÇC¿ýýïñ߀çõèˆïãyŸþü³ŸFŸ®žíçû×ÑÍvï<9t[*£ãÆ)ºïcŸÀYzAªèyôÉþÓæH³yüb³8®¶›ƒ3'ˆí#{ÿü exXþïþpñÿù->"£™:•»6ÔÑ~ Û‘æ nV`§Ðšm˜ê8‡&#w?ÃRœOMZí¤÷PlO)Óc¬vÖàb(¿Ö[®×«Ýa9£1°ÛޝÚsçž=ÿ®3Ü7l©vàÆ 8#­M^õäP„g|—ÉÙíb÷ÙÙ“~GT¬k½ÛsE¥Æéwœ†Éj\WÓFh­‹â >%ã!”¸¡MË=àß¾Qa§x%’ʹùÁÇ|bµNó·oßüù훿¼}óíÛoþd~øcÄE‚1æ#Ë*\—ÃéÎ|T~Ë ˜ w‘õƒ>ˆDEÉr’‰¢.Ьl“Í­PÈ4Ik0Dáíùµ¬«ÝÜ­çö¸\d9ͦÿÂR2Æë;H¨¾îs¢th†ã"n#ñD¡ddÉ*äÑx „Åè/“Ao˜tW‡”éÞ0%‰Äº‡N¹ Ì LG Ú*‡ Sr‘b\â¢X=hÂŽRÒ õWÑ™ Õ“H9½RœœòØ'JhhRÅ{’Hy‚fNàä¥ é sÇ­JÔw¦"Å WÓA1¹u®áÉ\è©Qådí:Ö£“UK'‘ФA&U3))ïÄPEJ"`Øò^?)ð%Þ¡Y@‰T”t¥ŠÂn8—äÔ0ë ¤4ª/•j)— _8Xlh Ë\AŠN|#3¯ %±¬ÒSÐ’®Aõ¢’ßTb5j¥ÎH¥Ô'uŒÔ¾B«;X\D8—ŠyI÷>äDã'I,¸ ¸X󾤯 $Iœ§Š÷%±pæ=.3P(öTŠ§Æ„ùPb@Sé[T{ª×PbxB¸fŠ÷%@Òà¤2̤ÞH›2Í5òu–B¾2©Èúç½á`¥ '7“Z­!ûäy+ÅB%*øPj¤q V˜–Ä*ó Ž¤‚”T­3&%±p™c¬Ñ†™”ÿ")Ц­°(V†ÆnM<—Iñ‡Ê€¥T©VÊ~xPgäRgXÏ3Ìâ WäRc)šx4”ƒ¥ðÛs)õ(þS‹šb>–J r©(PƒÅýÝ Åî`!o«²¹Ô`»Rå£9X¦Bas©&à\à€…Gí`]kV§»Î2æ6. Š®‹•Õ*ØKy‡BÇñš¦X%92 [•K-‘'úö¶ÊÁBŸREaÑ•Zy¬öCÁê+VM«j"Ãi)œ„àËr°PÓ«( œW!E–+S©Ö½¹T””§ms“n¿2ämŽû˶µD‡ U"‘Û ­ÊA"·Ý±:d®DÖÆ–ÈmµLGÂ%‘Ûª‚™2}›k"`[%:äÆAo;•è°ú´qB½ò=†K†qÜßóƉà–0[zît¾sùWÇu”Ý‘\‡ïƒÐá…9èÃô[ÚSpø>ˆzOÅ¡³yzOÉ!ß2½ÇuÜ0¯ViOÑáû ê=U‡DÇtÊ:ȇïƒÐ{ºŽûƒÔs§Ü€C}×Q¸ï ã:Šàôa\GA¹ƒ>Œë²×áû •ïqïѯ|ë¢Þã:ļCÐ)n•K‡ïƒÐ{\‡àtzë«BïqbÉAè=®C|8½Çuˆù¡÷¸qÜ ô×!¢„Þã:Ä[ƒÐ{\—ã: ƒ$×áûêÙ8èױ䱳±-7ÒÎÆ¶~M=K§-‹¼=ËFbæYl|e§q‚l7!Y nN1¶çÄb´pí§…ç†7]k-‡·pí§Ý0Æ" \Å©‹§…k?Þ¶{xÛÅá}dЩiƒá¹ÃÏ­ Ï~xn `xî‹ðÃs„…7Ý ~xnf°ð¦GÁ Ï Î}^pî*°à¦WÀ ÎIçz¿ÜT÷-´©Ù{¡YÂlÝÝ iªì<.×νÐ\)7à¶þíçj7ƒs Û Îkç:´œ«Î εd/¸­3<׃ýð\ýµð²)®£öÓŠWp-¼©ËúÇç*¬}~S[õÃsá¹>ê…·ÕP†çgÞT4žë”^x[Ùex®5àMeÑÀÛz¡ÞVžk~~x®ð1<×íüð\¥3ð¶öæ‡çJÃsýÌÏÕ2Ï•3¿ú¶/†ç:–w|[µbx®EyámåÉÀs=ÉnÕÙZ® yÁmÈŒÎu?8WqøáMm&n;Tñ0¦¾â‡æjмõ-ZYm?YfmEÄ@RáÐ;¬­j`®UøÁ¹2ÁS¤}—­Ö>BûÉb« œkþѹB`Às“÷÷ƒs–߀sîÞΙzÎùw/¸Í¶p›C÷ÃsÆœá9î‡ç¬7Ãs_p~È6ïnó í=1‡Yà†p»m9°œýTÑÉwsÚ¿rÚåÎ[Åßçý+:}}û&„Ö¶OÁ…mԯ뺲G> Ñ‚šÙ‰ÍOÛ¼àsÁRÙ½+Mœàð+ï¤O?AcMbŠøLÇÐöwéS :9=I¨?ÛÔ²0x]%%k:æ9H¹„Çž½SÐ9Ö´¢6ŽÀ<ðæ; 8Ï£ ÏCbT0/8û.HY‰Ž ©xÓèà‹ÄHbtaÄT, LÙ‹Ž F¯ÑØŒò¥—Žƒ’çXêÏ ÐAš££ƒMЏE+HG¢ Ÿ»Y/QiG‡§¤#Qì,Üù@·OÒÖgÕ•/ú95«tȽŒÚ°„Z/Ÿ6,Ö˦µŽšyìóJ,U”Oj…å¾|+_ãâ´>ëC‰$£EÙ2Ú±ç ¹~¥M%ç4:ÍŒÊºÊ lT?«/4kgÍágEÁ଼à­UDlÐ.0ûi¼xÁ“¾ãvÇò_OœæÃ~ 4y„?„ã{Ì)>_ì—8…iG‡˜ü²ù)z±_]Íþõ£í\ôè£$ü(ÿiS=úÉã²xôf?ÿø£ú'gŸü¶Ûï’ür±_â„„íæç×Ñ«»õæp‰_^ÍnÇÝåÅ…Ù:??üènµØoÛ›ãÛ» ø‡«Åòb·ýz¹ßmW›ã%/îæ«ÍŒy ´©òÂV̳™}õíÓb ‹§ëkšÀb½ÿl¾ûÕKsxÇÝüp\î±÷¿ÚáÜbh„æ¼ÿÿÿPK!1—Êâ½­ ppt/slides/slide1.xmlÜWÝnÛ6¾°w t¯˜’eI6â¶l’6¨Û %Úf+‰I;ñ†Ý$À°‡Ø+ »ÞóøEvH‰vdX–b½˜/BŠ?‡çœï;?9u[•hK¥b¼zÁö­s^°z5ô>~˜ù©‡”&uAJ^Ó¡·£Ê{uñýwçb ÊÁíZ ÈÐ[k-ŽÊ×´"êŒ ZÃÞ’ËŠhø”«N!É H­ÊNˆqÜ©«½ö¾|Î}¾\²œNx¾©h­!’–DƒæjÍ„rÒÄs¤ Iˆ±·OTºËòyY˜Q‰’R3«·¯¥˜‹ki·ßn¯%bøËC5©À-^§ÝhÙÏŽÁ¤óàúÊI"ƒÛ¥¬.ÎÉlC·Cœ¿3áÐ[òf1?®æëwOœÍ×Ó'NwÜ ÁáQ1,oLºfù#ƒp;›`[o$EáÁ¸ö1".yþY¡šgkR¯èH šëÆ!nIJ~³¦¤Pf¹ñCžs~éÃâ¢dbÆÊÒ8ÀÌ‘ÐjAÁÕòMz­_.•vÚH6ô~ ÓÆýpìg=œùN¦þ¨%~‚§I„£4È‚ìgs;ˆEAwRNsô ¢G”©X.¹âK}–óªÓpÏQ¸à–À[R¶ˆ·2€ÛŽVE˜#Œ®JKªóµ™.Á¾÷à-ã’/6¤£õ†{À±ÅÍ/€^d£¹5ÞIžq&8ˆìrËŸ^?ꦉ%Q:°{B¥$Á¸E2„Š¢(í§ 2Ž(€¬Tú5å2ð:èi_ [°¨ÑØ1¶ÔÜ åy„K÷§é4ü(Œ§€ËdâfYädz éMº“,›—5+ Zq_‹õ8/YḤäj‘•Y¸fög) þ?ëzÕpPº±´AÖ¤`âÓäÎ&=ìïÙßý¾¿ûsÿ+Úßÿ¶¿¿ßßýß(9—¹Žôí˜CÌ[TŒÌcqÒ /ŒyRG˜t»i¿×`ßK’(NO±ï…a Œh°q¸ÛºÀ‰rÀþ[ìÁVb`ô/vF¿Œ`Ð$k¨’ºÌ¸ ‹«g¬eUsÞl”JÏõ®¤–U6x¬Á¤}ÞT¬âŸ äÁÏУµÿqî!RêKûý‰ø?\CÍú΄)†”Z0©mEªÒYI :lImŠGôNÐ%É!Ì€ørň‡ƒX‘Š•p1ó5‘ŠB@Øä a Ú]õÅür:™@nB߮Һ¸&’¼ZãVÃƭOilÝñr%zXƒ/*‘EÍ0÷ïùÛÿgþZ–AÅ|kéÿrþ Nz<ïöã°ÛÀß8ŒÒ4„äfrW€“$\š„ôåo¥ÿš¾èø›¿¹š K¶Dîtn'¤‚ÛPüšxüöL!ÃoLĬ䤦S4ÖÌ6unûÄ› P¿4P˜ò âÔ†«kG¡Ñ°ÕHØ.±éÆc›,ûã šùѤŸ@­{þ¬%?§£¬;5µV@‘Ë%µï×ÁÃâóZ Áo¨œÙü´ ꆽ qœØ®ômÚ!;ºÚé:ë¼”WD¼ÛÚt ÿ0h*¡<Ã’äLTCJ91¶CGþÿÿPK!o@Ç—“ppt/slides/slide4.xmlìXÛnÛ6¾°w t¯Z’u°Œ:E,ÛE‡¤ 껦%Úb+‰Iç°a7 0ì!ö î÷<~‘ý¤D«I“.Kzás€H¢Hêÿ¿OÏ_\Ö:'BRÖLÿ™ç Ò䬠Ífâ¼·pG’ 7®XC&Α΋£ï¿{ÎDz*´näOœR)> d^’ËgŒ“Þ­™¨±‚G±_@¯u5Ï!¡¶ª­¢Si˜& Êûœ[á¶´¤^:ÍG¡ñh™ÍÜãEºñÂO¢Ùp–e3ßÒRÒ¢ îîé¬ÀYE +%)6«¬Ȱµ0?£H€¿¯6ÐêèðLÚkËhK ”í}$_íð/G ¢v׿î>ý±ûô×îú7´»þ}w}½ûô'<£áÞ[º9R—S–7vÑ}öã‡í]ËB§g 븠ŠÒvüP<èÀÒbF‘`ÒÐ4÷¾ï%iwØ®,±à~½¡t®d[EIJ,.Ъڊ·¼D¡Ÿ.¨ÖT<ŒÚõý‘§ÂÕ¦U9ZÜ?RU.KÌAøžQŸŽlOߪÂùÇV”/qË)ôÝô…Ú†·}0-‹}œÀ_ %kЋ+ ê ®ÀÂ…À0‡40ßA8ªÊ˜±¸?'.hg…¶¾~QIµTW1˜Ã–ôöq[Óš}€a œWÁ`9qHã¾_ê¤Õ‰yþ€ÝÎ`žý Љ;pðÀÈ2'Ài¡Ðó]ƒÔ'kœ@K¶9AKÜHt&‚y@àº)•ƒ8…gkZi¿ƒó I€ ħ£WGË“ù|¡‡£v3¥¤)ΰÀoïΡ‹yŸC—Ó9|¾iØ}d†VÀè´4fµ%ï7fôÏÆ4ö•ÀkãëÇÓC?Œ3(£dèùÆví(©§÷a’†q¶ÆŒÃx8l'ËÞâõå!J¼ âÂÎ`u¶Wè%S%ÍA̸aR7õ"øßÿ²ÿ…ÔÑg¿[Š„gWÆÖÖòé¹@¯Ngèé‰ )'OÏåH¡+Á‰°™»‘Ìf¦»9Ý}u†;bæ72Áã½ó¿þp+Ø­žK7÷Úä«üOÀS§Šèãí‡GIè`¸½ÚûÏ8`ÛäæÈçüýšð®÷=«Õ½¡ïZ£~ëHáý«V³f³‡Npœ`öTÜœµ[êé4ƒl4u§~¸pÃYšÀ–:ŽÜE4 Ãl::Άs½¥æ°—Í1ç[¯ì9>ì ƒ³ "8£æœíæY‡)¬jãÔl·Llfým£…ìùY^‰SÌßœmÁ± l acE0½Yƒª};œ»ý ÿÿPK!‹©‹[÷&‡ppt/slides/slide3.xmlì]ÍŽGr¾ð;úh€šÊú¯F ‘"kH»ÂR ì‹==œ^÷ª‹äІ/"`ø|óÁW0|6à·á‹ø‹ŒÊîÈæ03ª%- £y`OÏÄWQ™‘ñ›Y_þêa½JÞ.úýr»¹™™/ÒY²ØÌ··ËÍë›Ù~xñ¤™%û¡ÛÜv«ífq3{¿ØÏ~õÕ_ÿÕ—»ëýê6z³¿înf÷𻾺ÚÏïënÿÅv·ØàowÛ~Ý øÚ¿¾ºí»w¸êzu•¥iuµî–›Ùˆï5øíÝÝr¾øf;³^l¾H¿Xuî|¿ÜíÝÕvš«íúÅ—±hï–¾ÂÈæ/W·ô¹ßýÐ/ôÓæí¯ûÝËÝ÷½ýóoß~ß'Ë[Ì×,ÙtkLËìjüÃHf¿n@†®Nà¯Ý•ºë‡»~ýÕ—Ý5Æ–<ÜÌ0ùï逺ëÅÃÌù—óãoç÷¿{„v~ÿüê+Çwp`J£â}:œÂ çã‡ùøá?>~øúáÇ·?ü[’FIÐO†è®··Óļ“~;ÜÌò¼6YžÍÄPMQµyÁ.ë"oZÔe^fµ©f ½lÊ2¯,ÅaLÝõüÍ~øõbk'°{ûí~°Óöú?£×·ã³™o7›ýrXüÜîÖ+ëß\%yQ•i[&ïÇiDŸ€þ$A¦ªš¼n“ûDÜÑcœ GNY]·yç$AYjÒʘ8§Lr¦ÉLœ“e˜è¦ÈãœrÁ ˆ¢IÓ8'”ÖxŠqNÃÃì™¶É›ºˆs’ ‚„§lâœJÉ©©«6S<' *ʼ(5c‚$ÇTcp{QÙ“ ¢ÂäaÊ£²WKNeÓÜ^”“PGcª8'˜†ã˜Š¢5)— ¢.[šˆè˜ZÉ)k‹´Êãc’ ¢ÎL‡åd¤¾×iZ¦Uœ“*Ê&«°²ÄYI…/Œ© …HTVUU*t×xŸæ°ÀŠQIPQ¥m‘gŠQI•/híSpò0U[´­BÐTyÍx$}Q7e£zJRÝ1qÂf‰)Ú²N5K9Y;©Odi¢ŠëJÓf™jPRßs¨¡Š••Øú—r©ðUa*ÕJPYå)‰Qœ•Ôø†C³Æ *iAÒ,}™\&€(iq‰>,U†V°lÑaeRåMnÈ5Pð’¨Ñ]Qð’:o°¼¨ô7“¨²1Ö߉Kj½iÒLµªgUÂE„s©—Ô}øÐŸ$QpA[òDâã’ÚŸ¥p”ŠLñ¼$ª,ÒTå”erÍÈÒÌšƒ¸JT ˜j½Í¤þëçP¢JÜ!\3ÅÊÀ´ø­ŒK¢ÔnL&W€,-óÖ'ÎK¢ ¬À•\®™iêTãÛz¨"ƒ‡¡±'¹\jk͸<LÉTã’+@Vç™ÊoBHx´xyžêÖÃ\®Y[e…F¿A)§\®dý ͺᡲ’‚…“›Ë«ZKö5*‡*ÃbITÈ¡\ X"BÅyITUÔp$¬äàœ¡8+‰*³"U…¨RÿKS&NÈ%*¯šVÏåRý±dÀR*‚T•±]3(Krð ÇlEüYy(Ä#©(äŠQfU­ŠR=”Âo/¤Ö—&oT9 ¥R¨B.E[7°ªŠ‰“(Z‹r€ØU*ÍCe˜ŠLa ¹LÀ¹¨iŽê“‡‚¸6šU½úŽ„Ëâ¬iæO¤.Ÿ&u{7?Mê(ÆõàÓ¤nì 88qø>eìŒJîø> ~"uˆ:'ÁO¤è$ø‰Ô!>œ?‘:Ä|“à'R‡èoüDê›M‚ŸH¢´Ið©+¦I…ARlð} wŠl<¸'uìË!L¿˜Éêf¶š%èŒf šŸúYòêföŠXv×»n ÈÇý˜¼Cÿ‹k†¹?öÂÐß×Û·‹¶–r @hŒÄ콌ñÕ8Œ#åjã!¸9Å"Æž“áèÜçŽ9p‹ ÓsçH~lazä*Ž]<îºî“¯?¶{Xú±‹#x}dЩiƒé¹#LÏ­LÏazn `zî‹ÓsÄHo»ÂôÜÌ0ÒÛ… =w$09÷ɹ«`$·½ArÎH29×ûƒä¶º?RÛÜPš5l¬»)m•¯Ëµó µ) äÊ-ùXÿ’sµ›É¹†$çŠ5“s:HÎUg&çZr|¬3=׃Ãô\ýéeSœS#÷9ªWpGz[— _Ÿ«°ãýÛÚj˜ž+©LÏõÑ ýX ez®qFèmE“é¹N¤«’Lϵƽ­,Zú±^¤«ƒLÏ5¿0=Wø˜žëvaz®ÒYú±ö¦çJÓsý,LÏÕ2KÏ•3ç 8¹qŸãrÏ/¦ç:VðúcՊ鹤+O–žëIaòq9 [Ë5¡ ùX²WçºN˜œ«8|ó¶6!;Tq3¶¾¦æjн¸ó-Ü\»Ožó±"b)©p¼ìXÕ°Ä\«“se‚‡hë r[]°ä\3“s…À’££yÿ09gù-9çîÃ䜩·äœ’ÙvK>æÐÃôœ1gz΃‡é¹˜é¹/ø„~¾ÚîìE’OgSÑçŽ|BÑ¿¼ß®–·/–«9s¶‰~ñlÕ'o;xН^›í^½Y·½åß5eš:1:[Þ•VrM·Á^ÂûãÇ—œÓ*¨p›+$ÏÑ@Î]ÝøÞ¯ÐO¿ÿýâ]ðð;¹³ûäzÝ|Ž{¾õý}w»`6tãß¹½ ]ùóp¸öxÚWpœwmžÝ‘ž ‹»;ø×ð8êø€°œ·›#x½Ül{ž6ÿ+ŒjäÌô±S V϶xð€ºÍü~ >ôt Ìâ~xI@ûùðÿèV¯±ã@´ØÜ~ßõÝïñ—¿{³^®·^Ú½+[ ùs÷äo¿Ç¥W÷öûbóä/ÇÄñìÜÙÇHÍÿŸß€’w4<¾À¤Ø ñÛ·?} @–Áá€9kZt§ØY=n|0d¬„Ä´ÀP¾ð¼»}R…&o Z°ãa§ÿ“^ó?U¶Å]ÄÚþÑÏ[Ùb_„"ÇCqÿPŽðA’‱ë2µDøH ¼ îd‹ðÁÃ9ð©ÐņVã(ƒ2¤í΋ðÁ3>ðiÓ-Xq>S×´=$þ| Ü>“è4ÎH‚´É .dRŽŒÊÊPçttæ$¨Mó]žQFˆ|ŒÓ˜”z"¢ A?k¦Ù™##VÌ8# ªÛŠ·dDd‘Ü‘2…Q詵¦åv„‘×´oÐÀk2jˆÌJ3Ã(1^RËáÇôA)xy(^ã"áõút„¶ *6.¹>`4èá£^”ظ¤¶k5—ʇG Qâ†Ð'©ïP¨¶ Þ¸Ø $ˆdÉq’ ŸÃÑÒ ×ñosÔ(ã$5žÓŠ1IP›SIª¼BŸP=>¢Ã—ðÒ£áÂf]×Ä.̱¤1z1:0ÄQ‚]L¥CX$xZFB ÁÓòù'é|Æênû$™?-—OK´¼ïK (¸Àô‡¨hó¦nš´Ñf$>MÞhÕñàžÄñ ~BÅ‹U·2Óœk#œ€añq1êñ¯~š†ý{{Ãì¶Jí¨Üç˜H£ŽK^ØùR³¯m¯¬\è 5Û]KÍŽpzô{-9»³arö^-9;¥aröAÇ«“k&w!;;ˆaröùf¬›&g§Î’vÕÂôì˜ô2ÞtÓ}òC+GO.SðúâA±Û¤f'Ç^œ]— 5;*–šÝ µ¨™° ¤öÂÍÁ/™ÞÂÞüÏ$‰DnŠRP—ôÖ'éB?7vIoÑy‡;` Bé-kî~¦ôVU©KoaŸâ¸Š‹ôVže8ƒÓ[5v'àçŸ1»E•!»ßkÜ1J×~$g奸ò¬2 åQŽkñ#?ʰ´ÅN/ŽeÇÒägØH̘²Š±ñ#R¬˜¶Ÿ=2 ¢;B˜ã#ÃQä`Whßu„eMéð´É`ô¸7ÂG‚2ª ÆŒ_aí«Š“OáçãPUB.$6o2mK³›k"ÑTÄlŽ!ÑÇ/1h±9Á0™¢²xøyHrS–蜉ŽÃKMñäFÇáar옳hÂ#ñŽ 8¦-Âãñ@¶vK[#Œ#©ÔZãæå±”Ö Þî!;©å#1J6Ò ^ŒèUr1¢ÉňÆiëŒ$?Lj60TŠÕLj?ŠÚYF›ý#ë³T­­–­­–ÖPm«%Hk«¥5̵¶Z‚´¶ZZCµ­– ­­–&Tm«%Hk«¥5TÛj¤´ÕÒ„ªmµimµtµ6”¶Ø ¯ÒˆJÍ«q$aÉ~ÖŸ‰D%FÉF*ŽrÄ9|vU²‘˜s^ƒn—šýÝ ©«äOxk(ë\TÕ³lµ½z|Þ¤¦žð"ÑÞÚÚð˜_ùŒHEE9yrÀK'Ü(¤MªéoÖª„MêhÜg—Šf°‰C£53=àEX­™¬Ÿj¯5j)Õù7œ—3ÍXãp¨Ô6ó„ÅË‹wÏ2ÖHÜhôÒk÷8ÇX)’¯mã,cèPµrÊ%à,cÝdM®XÒ¼x÷cóXUk´$kk©Óò-õã]î]‰º¢RµµFôb¬¥Ý5cþœìôÅXÇÔSjçÅXÿ_5Ö8*§Ó›q„—-ê}ÆéôŒu&ìL1 éy_Œµ=½ÿb¬éc\"k·äÄéV_Œõ/Yg8I|Lm‚4ð87—+½0Y¿KŒ.~÷JÃÊÞÇ(ËÖ2S¦‰à%½6‚—Þº6‚÷0ÊÒ¸´ÕFëHÖ)ʬv $HÁ˃óms$Ï(äqínÑþ²³Òí5¶“FÙHý>3‚G»§[CÃ‘ê­Øç¤Û)ЮÃð´Iíã©NZlâG*7&àŒt;(ç>¶Ð¼IÝnÎI·ã/ñ§#»8Ã)À«œ¸5+4©Ô(žd¹"ß*1ñaHmÖ¦ $¦lñ:¶¨ˆùö]—Ô÷1ØNÐpkah¾¼ìùtŸï¼ŠÏ˜‹ŸçTUÊ¥þàX<ý?Ï'hqtyüáÈàLŸ È §—7?Ó' ‹H.gú^#g$WuV_‚ЭY¼¼ù™>A…w)ÆG$•[k«½w!=b¬/[?‡ÅŸ.[?c»a¹ä6¾ËÖOݦ×ËñÜÇ-Ò(d"…®ß·z"rXf'Á±XJ‰õçþéOÝNl<]øÇöò*mï„×Þq ŸÛÊHLÃâZ7enƒžût›mC“#3U¹ ®ŽÌ}Žä¨XrœÎsÜœäÈÜçHiD $ˆñæ™ûtW·aƒ½º±Ñ@œ½¾rêƒÄìÄó¥a’ƒ´ì‹[ÚðEÙ¥¶„ÖS^Õ&Ëø¬»$F|Ls–“Ï$´.ª%Îq¶kê$ÜM¬û'Øú™–/m÷¼4;‹–B0HÍSâx(‰»÷ÉwÂn›¥Æv`8cÁk³ïe©Ù£ SÛ¶K7ä©?§_î~® ­té·<¯ípê™ý5½ÂÛívø5¦ïp|š]÷ÙçÚ¢SërŠ›;ï²Í5pŠÌ¡ÛæúOüÏ?þ÷Çÿœ|üð¯?|øøãá{buü°Õ5¡#âp’ŸÕNz_ýñÍõ½ÖÄ{|g}†7dáÝV qŠR]Tve:îsE?<Þdޤ=ã†w|f&uzêŽqÛõü&÷„~ÀqØ8ÂÏ º;Ò ÚáHˆófK'%’?K莸ëÜ¡{ïúnw3Ûl7 ±}8€àûÝ×o\‚Þ+ó!}Va?9‹¯§_÷»ÇÇìÝáÛÿ=f‘6CÎ’Ûe?`Ìøi¿ž­Îôã WݰÜ$ÃûÝâ®›/nfx}ÿzÙÍ’Ýr˜ß¿èÖË€,Îï»~¿À¤¸wC75|õòÛçÏ_ÐpT8þ·¿=ç Àä±;¶ÓqþMïÃÞÛäSáÅä×JÙÏ ¿¦Âk1$0á9 ^òJVä(¿taCGŒó1„5 s¶÷ÿ«üÒûÁiñMŽÿ^þæ»o’o—¯ú®ŸÜm{O¨I÷éqØÿÎÔÇ¿ÄxžÃ ¿!y<_á¬ÒÍ@£yñf3–ÛÍÞ„ëŒ=­¢ž å/ §v‘ý¡_àÐ×Ýõâa°'ðØŸ’7ýòföOŸBmž5OŸ<5Å‹'Å7mýäëUùäN/.ž=m¾~–?ÿÇЦ¸ž÷ ÌÑvó›Ûäa½Úì¯ñË›Ùý0쮯®¬Ôí¿X/çýv¿½¾˜o×W°3Ëùâj·}·èwÛåf¸¢\­»åfÆ^T^—8Ho4d½´÷fƒ»[ aþruK˜¯úïºÝïÞZIZwûaÑã ]üj‡'Ç&EÐØû_ÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout7.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout4.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout6.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout9.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout8.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7-ppt/slideLayouts/_rels/slideLayout10.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!i¢_!Ç,ppt/slideMasters/_rels/slideMaster1.xml.relsÄÕÝjà ðûÁÞAÎýb’¶é5½ƒÂ®F÷O>X¢¢v,o?) (ŽBÀ›€Šçüø+æxúzòÆvJ2È’ÊJ‰N6 >/o/; Öq)x¯$2Ñ©|~:~`ÏßdÛN[â«HË uN(µU‹·‰Ò(ýJ­ÌÀš†j^}ñiž¦5ÓPÎj’³``ÎÂ÷¿ŒÚwþ¿¶ªë®ÂWU]”îN jûNà;ÕÕù²Ü4è$ÉtÞN»ÄóÞ—­bÊV!Ù6¦l’eù’4ç¯Îò6Coß,äX”ñè­ÊC²lÉ€•3+bÊŠ`fqC ¦¶‰™Ú&˜šëã=­Y²­cÒÖ!Ù>¦lÿ'£³ßoù ÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout1.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout2.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout5.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!ÕÑ’ñ¾7-ppt/slideLayouts/_rels/slideLayout11.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!·cÔ·Y"ppt/slideLayouts/slideLayout11.xmlÔX[oÜD~Gâ?X®Ä›ãËŽ½^“MµW IDRÞ{6ëÖ7ƳÛ]ªJI,!x@}ª Tâ P´¯´P‰ÿR“†ŸÁ™±½IšMIÈ¥ðâ]{fÎÌùÎ÷Íœ3‹×'a Mý8jÊú‚&K$rcÏ6›ò¾bËRÊpäá ŽHSž’T¾¾ôö[‹‰“Þ2žÆ#&(upS2–8ªšºCât!NHmƒ˜†˜Á+ÝT=Šoƒí0P M³Ôû‘\ާ§¾Kº±; IÄ #”˜ÁúÓ¡Ÿ¤•µä4ÖJR0#F]›&à-Ã6|VämLdIô§chÑå%€À]<)Â!|ØúãˇOóí¯ò?òïóì‹<û%ß~ôεIë݃Æìó|çq¾ó Ú……4Ù „p[Ñø=š¬'kT^¯QÉ÷øDå²Z6”ÝÄkÝàúÊðÍÊv&.-b€“&Mâ;åO„2a’[|t¾ºÃÕ9}ÝaoNoµšV0›¨‘wǨÜ9€ä^’>ó²ŠÁÔrìÞJ¥(¿9…»îʸ2Î1àÓ%C©ˆã“¥˜úÛ"ˆå¨¢«@« Ä+7f8Y–Ñ@Z–QGVÍ> ™¡™uÑÎ3mS7 SLRY‚I Ó‰Ã&íØ›rÀ?_ˆ+§US&øcø-Ì)[gÓ€ˆ°xØ—àoB?Œoú‚ 溼‰•Öd lY¼“H¹±: Y' t\Æ—-åÙwœlqö\:ÊÌ'@×½ç÷þzôxïÉ·‹H<*ç%‘·†)þè,Óó¸`ÖžW xlN&Dí8!iDʳy¶ «šyÈ5•mqg (°âÁ¿b Ç+DÑ"*šž0ȬÎ&Ô5/–¦7lÞ~Y|KÁ8˜Åþ2øs(0ùöÉü)h ¸j]Ò‹âõþî®ô g_çp)™`JÍŒq³gÑ!pø`ï‘EÏJ0û?TíÍU+öÎ×þôº4F¡7ú_e^%Pü\݇ª3îåƒþüýëÏ4AèóžiƒLà3È~q0ËÓLä"íá©’øs8ÿ'iufWYŠHEŽç%Èxy^z§Óí÷=­£´»¦¦ V­­´kv_ÑíZ£«ÛÈè7Œ»r™’y˜懤ïoŽ(Y1ùBÒCÓ몡õƒmÖÇM_atÍ*ºyv?ϲ*Ù:)qb%p…Zâ]ºpß«+‘¥¿ÿÿPK!XYJg­!ppt/slideLayouts/slideLayout3.xmlÌXÉnÛF¾è;ì™7qƒå@k¸¶Q'0&GnŽ©AGÚ=ä¸h€žº·I=´Y€¾KX§yŒþ3$%9±SÕv_$ÎöÏ÷o3ß?ëWÆa Œ0Iý8jˆÊš, 8rcÏö⵫=É…”¢ÈCAá†8Á©xeãí·Ö' ¼M4‰‡TQê †8 4qjµÔà¥kq‚#ëÇ$Dšd¿ætd‡AM•e£"?Ëõd™õq¿ï»¸»ÃG´Bp€(àO~’VÒ’e¤%§ †¯>‰NÐ6Åî{y¢À'’t)âèîîž¡:òéÓ|úk>ý=Ͼϳß^~÷ÅÑgOò;‡|^š\%³Ñè]’ì&;„/ßíÁ÷˜¸RŒX+Êi¼Á4ø¨½²|¿’„œqŸ„ëÈ»ã†î›°_X„<¦‚[tºó^w°}Â\wÐ=av­ÚÌ6Ï'…F¯«£VêäÓ?óé7yv7Ï~”™rÅ 6c÷F*D1¨Ë¬Phén*™Lu¶K2 _PŸ¸œW r³TóSnÚ ïÌ ¦ªjŠÆ­¢ë²aË¯ØÆ4MU‡NYHÑ U6ë|“JlRˆN:nÅÞ„YvþÁ(r1„-e+¤t—Np7| (؇¼ ãáþGЕ~ÒaKØsûßE`å¶åJðúq‰`sä€Ià„܆~_÷¹„±l½Ž¤vDØ‘nò6ޤk»½!mŠRuº‘g_çÓÇÌEÙ3aÑSùG/üqôìÞË=úŠA ßGÞ"ˆé°ôö̯Èì`ÇÊ~ܤ̷§Çx¬Ì®ìÓ|ú¡Íî yv˜g¿hÞ~¡•gL•Ù²² Ÿ3sªI SÅã™âLµeĘã1Qåàñ8«Ë²b™¥Ã‹^&Îö ™'ÅYˆÈ&O?òàDcŸ,Tö†[pls$ ÑGo1œÆïõü `sù©ÛF(€ ³£ÀhÑclCàÑÙdîß90VìÄfÁÌ3BeQ Õë& s/W±V—adjrm×VàôX®±B¸ c WŸÃU4Sa(–3/ÓŒÀ ¢,ñÖðZªÅœ|ùð2%^cŽWU-0ïeÄË@–xͼ¦®-Ÿn«Œ²ÄkÍñ2°ËçÛ*ñ2%^{¯Q7/g¾1ÅI¼@N8•`èá›Ñ®ÖS‹…ËûMÔâ, A¯‹Ãoÿzúå©Ô€ßÃ祴m€‚~EŠ›1snaö±Ë]FNH*ÆS1F~W×7opWô¡˜`ÅÀ­v§×³»r[ju겤7µ–ÔÒ¬ž¤XšÝQ,]íÙêm±$Ä¢˜ú!îùûC‚·‡Tdynª²bÖÔšjÎÙàc¢WÈÿê•wóì~žeU=ÿéÁÿú”^þxˆÅÐ(¸à¿ÁÿâèZИYqçy1öùß÷:º…ê)4šW>çÍx؆'Z’Ót º™2†iBÒ4©m¨MIoé©©ºÔÒíf·ÞíZzÏš¥L tG€îB2åùÁãwž<ù¿ó„WÕ3Ô›)Ôd ¯î‡ À[­–m¨m Î EïIzÇ6¥fϨK½º¦ëí–ÕlkÝÛ r¢èŽK0íxß+_] óµ—’ÐwIœÆ}ºæÆa­xr©%ñML’ªxuQäò醗 ªl›²­ÕUž‡€PòНB ]ìÅ„×hù%Û#~ÝÀ#$”ЕÀ³ä›:ŸÂt¯ž™6þÿÿPK!Oùªƒ?4!ppt/slideLayouts/slideLayout2.xmlÔWÝnÔF¾¯Ôw°ÜkÇk¯½?VvÑþ¹j•&Q0سYÛ3Ï.»EHa½â Ñ©W­DÛÀM/ZZ¤¾ n Ñ3c{C`ƒY•"ggCWÁvêf¥RÓ#Äj±Ÿ½É~2îoá˜çFÿ“Q@“Ò}k”áÌÈÝÇ]â3 Ñ’Ë{ª"± µ q{;¡¯Ä(‚‰lþw6ÿ1Kogé¯ÙÙü·,…¿[òyS®NèE†±ØO>et‡n3ids²Í”ÀF cª^¼(–Éa Ëà‡þÒöÝÒr¦Cµ×‘È(Ó– ÎÄ6!O¹âå“ÞѬ7ÚZ²Ö –¬ÖËÀƒÅ¡À=Í#z5³ çElc\¾… â]I”˜@¸…¹ó‡¾_$9Yœ‹c1ôåiŽ!|‡°ËágÎÅÉŒT9–žJ–ÞËÒð[Þ9œî‹0̳±ør%¡+ xUÂI¸ý]5;(q¸ý:ârü% ð(ý’±®*¡ž(/%Ëë.2s‘©Eæ.ÉTó4fOSBßAòsz9P2±>x ªï¨ª¨À(ëýeý/€²Ï(¸öÏïr±ÊËåÙ½ŸþùëÛ¯™Ð Êëý­€Ïá þ„ ‡*ˆ'qÍÈ‹Xê¡Î$† ë„"»Öë»nsPéiݾ]ѬNµ«u« W3ÕfßhX¦Û4¯«…*ñÇ<ˆ°ìŽÞsu%ºÂ¬uÝÔÍúÑgü¦Ï‘]»d7KïfiZªœ“tƒ,Ƴ’<ä,gù«1bà 'z…‚â¬-ëgÙ-|óüî/‡w~?±Vì³É¯\,Cw¶9Ž–")oá—L­^‡¢éZ¯fv4«kõµŽU³´®Õì ìÁ a¹EÉ$aàã¼[I¥<ÝüÉÓý?ßuH¥]örÐXm$ ç©l±Æ z‡kÝn³föð­0,W³úͺÖqk¶æÚUËêu^upB¦†åx ˦ó3¿h~aò•†5 ‰Ísð–E,&²$¦Ñ) èò6xÄ¡”⪓߫“¯ªòÓªü^ªNžUå·âþ˜X䇔¾$¾Oóƒ|ŸŠõ»Ó}*E!·×Ø‘ÕæA3Mܦ0 .ÔW–µ–°7Ñd{{i¶%{sþ ‹°GfL êÁ`9Œ÷ΙŒ‡çÌVÛ‚ÅKø¼öèuwŒÖ3aÑÎÕ+0XØÉ‚;…”fà.Bíe°;mmr×ù[ò±TS0z(ØhŸ‹È´K Ýò"&¶k¹ZC75dXgÃã8Žø$9šVÏXu¾6{lÖËÂ9î-øä`/.Ø›ÇDBƒ=@?@ñI%ÙíHc^S·±r}_–pÌvÄ=I•›Pc ëÇC 6ì±íªü’'$XùRk™gÕ'O_>ùåôùÿ¾ùñô雀”A,š÷’4ÜÇßø7¯ç1Å`‡€¶>ÂeMôÅt›KºªÊÏV1JÆU^LnÕ¤C±@&·yR'ÇeÈ×MG·öM×µaÓ9˾ Ô‹ôì;–Ág×á¨kK„¡ÎÅ62ç²Ï)§±I(%˜îˆbŒÒ¶q‰ã# ²Xé3Ù…íTdLHFÀ,²8 ý(ŽÅ ßCI?¦ÒǰÍøÖDG)«GK[@.Ÿ,x\±¬¶öá²ÁÇíÀ¥±„Š,‡GFZ?¼dƒ×\âíèH”ìúáå ¼h‰w‘†ë˜£l[+€]Ãe±~€9ʰ½l.TîZ¦0GÙvV;È\Óšã(Àî0G»¦EÇQ6€;+€mË{ÿúå0G)¶êV;pôoN:¼úq^Zñòñ×üöyU>®Ê@î!ý$uUsõc^…˜Ȭ¡-Àñ>uBPÔßz®¯!üâ@pÃÅÐwKAtîg)úFÐpI¯?ðýÎPë+½¥)¨kö”žéúŠîšî"Ãï÷åFÖ†˜%ÄŽ&”ìM˜ÌSákGCÓÕP g)7ýå¡Õ²[•ª²l•ìE$£« yK°üÑSFà¦&ú¤#Ðxi¢ßbíEW:Ë?}wúðç kE´7У¶½Ôê¶ ·ß$çFR{¡½¯®dlÇ¢éêJß6º ê¡ÒE6Rz¨ÓZá‹|wQ2¨d’º+©”ÇÏÞ{qüë›®ÑF´‡Йïвå¢GŸPèïõzÛè»°WèÈWРã(]ß¶ß2ê÷ÜnßÞ—sy%âÈ⃰9:Á׎;’( Y‘ØF%j}n¢æÙ]Bó š8:ѵæüEt   h– ½%PІ°E .ðƒ;ˆé‡8ß›ÂG {pÒ…Í åp¶Ã»§3S¸ïíYÑößÿÿPK!A+,]¿z6!ppt/slideMasters/slideMaster1.xmlì[[oãÆ~/ÿ@°WâV$ÙJ·p6F¼ùc’²¸æ­äȱج€ }ÊC¤M€< ¶›¼¦ièOYe“þŒž3J´M_`{ã] kHäÌápæ|çœ9çíÝ×’XÙ‹2ÊÒžªßi«J˜úY¥»=õí#ÍS•’’4 q–†=õ0,Õ×Ö^ùÝݼ[ÆÁ¤¤a¡ÀiÙ%=uBiÞmµJ&¤¼“åa }ã¬H…Ûb·ä;‰[F»í´¥ªx¾8ÏóÙxùázæO“0¥|"Œ …ù—“(/åhùyFË‹°„aØÓµ)­Áúüí8Àï]þùV8V¢à´ÔnëêÚ]Òeë ‡q¡ì“¸§îìêjkín aq…—ùƒ" ñ*ݽȷó­oüûû[Œ CªJJÐ/À:„»MAŒ\{|WŽDºã"Áz˜! xˆŸðé†Tñy£¿hõ'ož ëO6NnÉÀÒª—âªøŠŽ/ÇË™?þÏüñWóÙGóÙ?•ùìÓùìÉ|öÓüñóÙçÐ4Ÿ=‚[…©­ššÍ73¯TÒ ô€êáË…É—¡NðõùD¡‡9hŽF4…ï„Ù¦•|Ét.Riʲ]0D¦.õÓ«ëÌ3ŒŽƒý¨9]·Ì6Üà\åEI_³DÁ‹žZ„>eÆAö7KÊE¥³>‘¼KYpˆíÀ7Øx!Lµ··!B$t‡"ˆ°9º6Ÿ}0þð²Ì?øî—Ïÿõì§ÿ÷ͷϾû UI™BÙ{Ã4Ø"yë"¯JfÚ•Zì¹y6©YéìÃùão™Q~Ôl¤â~Y#E½«"Š\ÆVu0J´[†štðš±Z¶awóæ+ZÛ…ìY‰÷™¡³å_‡½.YÄiö ¶Ë\†Èy±huU~ôë“'Ê9mÁÂ!+{‚ƒö"~þoÛk“>YQÌ¥n¼¢Ìß^QÌo¼¢¬ß^Qˆ§yÈùMùÚ\Ï~žŠ‚ðùí©–ÜSùôëŸükcÆÇ ú²›i@!óxR&ŦÊ" Ëû0;¼`è˜6üÙT Ý4« ÐtlݰoþžZKÙ&)Ó<–òíÇ:n¢$Þ…Œ-V±-ǘs¡:uL,°­Ìâ(EqÌn°¦[Ô:ô€—@4J)¯~\{‘W…Kì–Æ}€¿‰uÀ.Žá×"Åw±ít¬4zo¸>u6ÚCm°n·5«o´é4Ý3;ëºg£Žñ>dɬ  i”„£hwZ„oNy.~élÖhënËhîbWƒùá\Ÿ£sÙÒ¹æ³O泙̭›ª* /ëcc¨ ˜UüiJ ¨õ…Ÿñ¼ë«óú™©–¬´Nv4¯c¿ÔŽ&‹±›çjÏцʆ‘ø;#þüë'ÿxöñ÷› ¸—5d`«îO““l™ùÉ…ö ǶÍÓmùeß48­pó,¹Ú4×…m£¯kCÇèkÖÀZ×ú–ci«Óß°76T·]Çö€S“ôÌ%er¶°I²úqñÉ P!#¡@‡B‚ÑSƒ=¸ÚÙ5° 8EzWÁ\ßÞ$Ä…l~ÞRɘ²èÞ ²Å–-°Ãò.G¶@¼šÄQºÊÀ/Ugñxƒ¼Ât±á›ä0›Ò{@"Žla‰Œ¡[®å™ŽÕ¯‹¬oq/Ôg“,d£ YÁ@5Ê‚®ªqE‚Ý( ú©dE¢Ð( š«dE,n”… ’uŽi¦¦´]ɺgÈ•,ãhk¯ë.ÉvÎŽ7ªqu–úŸ2p 8iÔKªÀÓƨ–hŒ•f·[DÂ+2oLˆ¡ÈÎ6äÝŒíE¼)'qC²™ °<À8Rq " _áekšú@‹ÃˆÜà¡’çþ–/²r¶$Ⱥ¡MôîLïÃI+ Dü®ñ¼Xë /Ù <:o5#â{–k6k–˜‰ý«ÉC-¦ˆäÍäHGHx‡_éðKìhªê*†£`Xé;!ÅfO5-£ƒ ‹Rˆö 7M6ÈBèºÁUrÎö £ Š(¬_¸šúEDbUÉIš• 5 zm§mÁ·ü^–GÔŸŒHÅp2aBƒ?!E²D»3B kî©O}ɵÛ9Ïg®ò´ òTk€<ÕN…œ¹‘,‡ÕX1NV°ž Å(„rQ߾̰þåtX ïº<ù aE,Et4°ÊÓ¶%\ ˆ·×3ÜÕ¸¶}…¸"˜Wk Wqtu;q=Ë_qƒ¸–÷ qE0®öW£m»Ì*qøÖøë<# ¿°"–Vg V[·XÔ½…°ž™5aBwãÝÁ¸ºK¸v\%+\ë?tÁè…ÀÁ¸z \yMSK‡oM~9üÁ¸v–põ<KñÛ™6•¿qÁdôçE‘w3: ‹Š°€ò|‹£/ õåŸÝU,ˆ‘l/ƒÁ4®¾0j¢ øÖñBÒòŸ+e±#Ƭ%\„KNr¥¬š²*[ÓÅße®üðÈoH.$Æ" Òu£„K‹æ‘Å81l÷i•È kÉ^¸ykW.îÊ*®éö¶çòaA²{O²{ϲÃß²þÍ>¿b°$ºcL7ýãh;ÚŠÙîþV,¸•VHåâ‹Æn€Á…<¶}‡KBÖ~'ö×V^öWEÞ€~Â&dá}"Øù¢}²jw7'`ínkZæ †…¸G¹EgÍѸ9ÙáßÙá“,½Ÿ¥¿ êи| ë¡};‚Ì¥^È­´7ú\&5>%ê E$\âá—ÉÜÂñ ¸–ùŒì×Cg@í¿Ù"²¼„l“‡™_@{dpø€(Üîù®îº,¢Y¿‹¤O·DydÝã@º¹ Uà“†‡TIá`²–¥?d‡Ï©±é aÔfȇWÿ8~ñàߟž?û~ÌÒ”ÏΦM£Êo7ʃÜ!qå/z(&nòÙÔŒáÔ"¾BV†¤“»ŸÙœõ›×9~ðûÔZ1.Â0ïßèù=9ƒ/âÉaÉ• MM•†V“ôºÞ”jº¡Ku½Zk•[-So›Ã’I<×Áhw!•òòàùG/þ¼ì:a“J~BóÍõˆGÄ÷½&Òwêõª¡5LèªÞ–ôfµ"ÕÚFYj—KºÞ¨›µF©uLŽTݲcÌŽ1>qŠãX‚Á›®)&ü«6beº±·-®-˜@ÏB¨Ú¶†¢Í>cLpú…Ö`Kœ÷@Pè „ÚÎÏÖþÿÿPK!Õy„‡‘¹$!ppt/slideLayouts/slideLayout5.xmlìZKoÛF¾è Ø3#ñ!’,–l-\Û¨°¦(‹ _%)Ùn 5‹ =åÒ môÔ}$顇6m€þ—°yüŒÎ¹m™6-;N 0dŠ~;3»»puϱ¹±„–ç¶yñJçL×ðú–»Óæ¯mõ爸}b{®Ùæ÷Í¿ºøþ{ ~+´û«dßE`¸a‹´ùaù­Z-4†¦CÂ+žoºðÛÀ Á×`§ÖÈ.`;vMª×ÕšC,—Ïžª<ï –a.{ÆÈ1Ý( L›D 8´ü¡ùUÐüÀ Ÿ>¬R´ïƒµÑ®·µ·µë­o_ç9Æp[äÁ~cÓîs.qàÆËß¾~ý,Æ»¡¿˜&ýÝø›þF€Âk〳úôáì!¾–ý‰áWÄà¢väñ†DZ{ƒÀY\ -ð·×æ!`ûô"-s/âŒô¦1¹k ב5†+ÇHר A>(ÄÚO-š6Gbæ$ÿ$?$ñÝ$þ•sãÒ' ¬zÆs=0—z!µÒX3Lj:År™÷­È63¹ôGt “Áµè³h¯ãõ÷©ýÛðo’–F›Ñ¾ ‘€ë±-bH«o>M=\¸ FÅÁVÒUàbvcäXŽwÝÂÈÙ„æÅu"|¼ÁsÄŽVñ»é ×6!Oœ¨k›ò( G´˜Äß'O©kâg\ÑCÉO^>üóų{¯züâÉw  BaÏÆ5Ýþ hZ}xêOÒÝÁIÌ#p™F®<~r¿øNrð˜jßå’øA?¥ñûCiߦ6HX&’f*›%ñ¥A82ᕆÔ³^lÈ Q”©J“¹¯Ô•º¨mÑ P妦¢Îà›}’Î4æ&6q8âCxh;…,NŠlq V1ë,·ÔA/éèÛ£5àGT$b\øy›—ªé63³0åðR‚I™2«*¡Ö§Q)ÕÔ”'¨MQA ª Šú4*…ÊP• ª(k¢J…+Á¢äaP¬ ¶Q€Õ%u˜–be°êV’tPáÚR¬ V+ÀjŠŒópVm)V«O`)fõã[Š•Á6 °jC;WÈ(L1'(é 0ërÒÃÑ/˜8 ÔtqÎÂJο'1üÝÁÏ/Ë)ù^ÿìÍuÆwå…!±¦äD_÷è`zQ|ÁÑP– $jŠ®5N @¹Ù!­¨DD+†xêÕ9áµ² —Œ†ŠH“/—e ËÈ¥ ‹”Ë2eŒQ”¥ó9—e Ëh T– €,ËíRY&²,aKe™È²,,•e ›¦+MпH¯¹mÿ£ÜÃ>'àëý¢Š©WqG ¥“È&«Êò*-«ÚŽ©Ò¤³Àž¥øƒ ê[¨)’ÔQG! ¼óŽ’ß¾£²jï¤ÀWЛQÊÛw”’¾ÞqG5.ÓQÐ$\^cÕÈ ‹BõR^U`¸f¯*ÒŽ9m¬h…ñÙˆ‘dE& 6Ï•‹ UiÔ%°š¨²6KÔ ô˜·Ym~ÞfA«;o³Ú¼FQ¸Ùé4U©«WˆJOP–›š°ÔSB¯!+J·£/uå•[`²/*-#0ñäËGýìÜœ:5ãXFà…Þ ºbxN-=~Só½]3ð= OàˆõìϘÀ. $É2Œ¥7Yª€–x&i &Г3HÄvð ñ×ǸF †`Áª‹·|8"3€ŠND¨íìÈÑâÿÿPK!oK„J¨Ñ !ppt/slideLayouts/slideLayout6.xml¼VÍŽÛ6¾è;êY+K–ÿ„µ˶ŠÛÝE7yV¢×J(’%iÇn`³º´§œ‚ ÐS ô7×6m€¼KÔMûRÒfÛì)êæ"‰£™áÌ|ó‘³{mk……ÌÚÞN˶0MXšÑã¡}óFìômK*DSDÅC{ƒ¥}môî;»<”$ÝC¶Tø 2DC{¡]W& œ#¹Ã8¦ðoÎDŽ,ű› t|çÄõ[­®›£ŒÚµ½x{6Ÿg ž²d™cª*'¤ ~¹È¸l¼ñ7ñÆ–àÆXÿ=$µá­ÊÁ”ll˨Š={Ù'G$µ(ÊAPž>/O¿.‹ÏÊâ‡òþ“òþs£!ù ±Ö¥«÷?â‡Â•¥ÚQíÀv뵚YRPƒ÷æÇ'®ç"í¢jb­‡6@·ÑO0B!^++©„É+i²8¸D7YÌ.Ñv› ‚óMu^eôz:~“ÎÅzXÞyr•{,¹--Ê ]]…*ËdÕøÔ©ë]øÂº€C­Wý4eiô%”ÖÔL­#–ntþÃÛQH¤:R‚M] z‚sx ·—y–³[™Á‚ Ýó·sýжQ{f©só8« Á8RXÊâ«òô©N¶xf]Ìzàåã_Ξ=øó۟Ξ|¹ •Td½/¦é!裳½® !vH»É>+,®F¤Ý òòÑ7¿ÿöEY<*‹!ZvñXwkq¢ƒ÷·Qª JŸë™ÛÐßÐ|ž©•JúŸ0›Ý4iîN¦q<˜µ&N4í´œ`ÜŽœ¨Ý¯ßL½~àÇÿž]7NŠVYŽãìx)ðÁRÙ[Þoy=×wýÞ+l!>íú-¢4è–Åò(šF¼ äö6@ž+Q¡üÉ …aQÝx ä|‹ìœWPâ;sˆþÇÃïÏü|%W‚m”®Îýe~i% ·L™n¯¤{Î¤ë ¦Î8èN ƳÎlÖâþ9e$ÉRL!º­0åÅÉÓ÷^œüúóĆÍu wßž„—›[p)àx¿Eƒ®?éÃYá±L=gw;NÜiÁ$ê'íÙ=H™{A˜l&‚Òz2ákÓDž%‚I6W; ËÝj,q9»ƒg™™L¼V=Þ¬³°çy¾×éµzº ^ˆ²y›hA¤g vBćˆ¬àÌD! R@´‰q*ë *:÷fýÿÿPK!Û›X¼dp"ppt/slideLayouts/slideLayout10.xmlÔX]oÜD}Gâ?X®Ä›ãµ=ëÝ5Ù­öË’ˆ¤¼ölÖ­¿Ïnw©*%±„àõ©*P‰'@Ò¾ÒB%þKM~wÆö6iw£¤Ý¤ðb¯Ç3×÷žsï̹»zuøÒ˜Ðċ¦¬­Td‰„NäzáNS¾¶m+uYJ]ìG!iÊS’ÈW[ï¾³[‰ï®ái4bØ 7å!c±¥ª‰3$NV¢˜„ðnÑ3x¤;ªKñM°øª^©˜j€½P.ÖÓ³¬Ï!½È$d¹J|ÌÀÿdèÅIi->‹µ˜’̈Õ']bÓ¢`ØöD–Ä<:†MnAèΖïJ!` Ûÿ+Ûÿ1K¿ÊÒß²½ï]™´ß?züóóû³½o²ôËlÿa¶ÿÞ‹…I¼M á&Âñ4ÞŠ7©°·>Þ¤’çrû…]Y-^ÓÄcÓà‡úÒòÒ¶&´V±8I“¦ tNùa‹L˜ääƒÎ‹Qg¸1g®3ìÏ™­–f…Lˆóˆ^ G/Ã9“¤Í‚ËW`°°97)Œ \ŽB¥³>.mòÐùWâ¡”sÃ<æ“b^þRÀRÎOZ›t"wÊãÿ îb[~¶ØÔ'ð[`.ÀÂQàÑuOpác^×±òѦ,aŸ­‰g*×¶ .Öõ †º)f­,ý³ 9‘>•ŽÇœí=‚”8|zçŸ}¿ H2 ²ø. ÝMLñ'çùOêt—£/ƒ3΀,EÔƒ¢Ë«K†¬‡”, ?‘|Û+ wîÝ…¡äóº¡[sͧóÁ×E°4©!à–Þø®ÞqYÜx3ô‘>>aºËÞ@º}ëÓ–çØ;חoÓ³è˜ÉKaÞ?¯¯Ï¿xùøßž]<ÿE L¶¯Ø$&(ùÊÛ£7I dëq«1C¢G—GO™G¤O:yšN~QQæÉ9$W:9EÉU46d •µ‚ ÒZ„üƒäà¡·^HéŠj•Ìý<Ý ¥) †Ýª!µ4Ž|ï²ôvø 1(Ž{@ÊlÕ\t•!¼òó8Ĺð¨bHfPZÅÀYÂ*xêLƒ$Ç+ÏðªŠÆÒh%<œ™Åà!HާÍ𔲡`²®& ¦ÓQrÀÊ  <° ¢ä€ú x\KBDÉ9@Ccž[CeDÉÍ ¢­î”‚ %¬ÎêcM§ Êbv»A‚Ñ8Á¤“/Ò³gŒZ K©¦Œ1u]ªÁZ| ¤ß'^/gFb¬~1«`}?dâÕ†—Ÿ……¬R†2•Õ©Y•/ÐŽY‚²–m‘ÞRÈ,ª^Wb¥ÕXýòZ“u”‹!HŽ·&ë(…ßëT7L:¼ pNo”SÀÛãð6@8¼å|$@Ù™vF,¬6Ü]ÍÑËÛº«uš¤ ç°×O¿ÇóƲ>IÛyÙôu)Y¥EÊZÈ]Œ2yÓÇ[åðÖØöà…§ û­v·[í”ZR³])IZ£Ü”še³+)f¹ÚVLMíVÕb~° u¨ë;]÷d;û*"1\ÛƒjI1dUVY ò!ô V({7þãøùƒzrüô;ªaаï:½…cL5_øó4˜¸ ºC„¸×à6K›Ó“ÂÀkl¢’„tø(ÞÔ€ác¨¼tx@Í—K,׆ê๷xN)š¡Ñ<¡¥6+©4„ ²¤ÒLMAa™O²re¾ÈÒ›»‡'+Üñ È3i*šÔ™È1ÜÊyŒ'›9ŽåÀ*3°ê8–«ÎÀÒ$éÀ€Õæa9°ú<,Ö˜‡åÀšó°ØÒO /уÆÈƒt2ð3›Ì}õêá/Ç~?µVô7áF˜…ntý™žœ³ÝŸÇ“£’Ñ ЦФº.W%µ¦6¤ªª«RM-U›Z³iª-sT2‰çÂÚÑt^ºR^<»öâàÏ·]'¬òa)L.×BEl†Ùaâu§V+érÝ„^Ô–¤6J†TméšÔÒU­×Ìj]iÞ“#¤–­Øa#Þì|Ô /_û®‡IØ&+Vè²9s! o;qºlÔŒŠù¼º‡áˆ¯£’\4d™Ç”dlŽ+ Ð91+Q/þG›=FŠ`0uVg¯"…CPè „šÎGëkÿÿÿPK!ŠK¶e/!ppt/slideLayouts/slideLayout7.xml¼UÍnÓ@¾#ñ–9»ŽçÏj‚â$F ÒV´}€­½iLíÝew“&T•ªúĉSU*®pà爄øãuÜ ÚJ —Ä;»3;ß÷íÌ,Þœ&±6Á\D”´uk¡¢k˜4ŒÈV[ßX÷¦® ‰HˆbJp[Ÿa¡ßì\¿¶È\‡KhFÇRƒD¸¨­¤d®iŠ`„$(Ãö†”'HÂ’o™!G;;‰M»R©› Šˆ>÷ç¿ãO‡Ã(À}ŒLd„ãIÈ_Œ"&Êhìw¢1Ž„QÞ?§$g Ðnƈlëš:Æ'`°ô ÖâP#(ÃÉñ§“7ÇÊ*Ø:Ç8ß'“[œ­±U®/OV¹…¹óÜI7çócjIà|˜¿¸o•‘;ò¤³ˆ\à@›¶uj–ÿ‚rñTjAa άÁh傳ÁhpÁi³¼28½Tf¢ópìÎ÷£ç_?<ÉÒ£,}¥³ƒwYú4K_eé>,5ënAÌ%l P ç¥À,OÊ[r2ò{ÙH+”%¼Ã‡ )Ї:° P-]ñ•V¥¿ò«rêÑp–3´ ÿʈÜXÈ59‹±bð!w‚æíöú¾ßTz†×¯U §[õ ¯Úô «Ymõ­¦cû-{O/“BË(Á~´5æxe,á —ƒÜÛã$JèýH‰Šé>2î¬êŠå’Zcbl¬¨DöbŒ øæJÊŽ]±¦mÚEÐD2•ŸR™„«ˆ£{rCÎ-r%T²Ÿ…®—«[-ÕÍÒÃ,M³ƒÏ¹–—Šl_…ÈCÉ •Œ—…Ðå)^Å_ ÿƒÎ)ƒyA¼Èže飓×ß¿½”ÆêUÐyyœ\Ȥ’Ixu%So4 hº–Ñ«Û]Ãñœ¾ÑuêŽá9­î 64¿yZ2"ŽBL »+©”/ûïn|Ùÿ¯ëD•KÙÜ¡Ó. ¨J¦zî˜Cïz^«n÷šÐ+,Ç7œ~«atýzÍðkUÇéyÍn¯:ØÈÌrÜ€c5on‡ó¹Æs³*‰NÊ…€&f1ôLFw0g4RsϪ̇çÅÐeìj«åXvM= ȲT_f ¦|jåi1¿‹ØÊz&raLC¡õ”‰Á`žwâ³#9örÐw~ÿÿPK!ÕÑ’ñ¾7,ppt/slideLayouts/_rels/slideLayout3.xml.rels„Á Â0Dï‚ÿönÒz‘¦^DðàEô–dÛÛ$d£èß›cÁãì0ovšýkÅ“»à5Ô²AÞë|¯áv=®¶ 8£·8OÞİo—‹æB#æâÁE…âYÃsÜ)Åf  Y†H¾8]Hæ"S¯"š;ö¤ÖUµQi΀ö‹)NVC:ÙÄõKóvè:gèÌc"ŸT(¥3r¦T°˜zʤœßy.jYÞÕ6êknûÿÿPK!À¥ºppt/theme/theme1.xmlìYOoE¿#ñF{oc'vGuªØ±hÓF±[Ôãxw¼;ÍìÎjfœÔ·ª‘¸TBBÄÄ*µ—ðeA‘úx3³»Þ‰×JZ"@P+н3¿yÿß›7³W¯=ˆ: BRž´½ú嚇Hâó€&aÛ»3ì_ZóT8 0ã i{S"½kï¾s¯«ˆÄÁúD®ã¶)•®/-I†±¼ÌS’Àܘ‹+xáR ð!ÐÙÒr­¶ºcšx(Á1½=SŸ Ÿ>†¿“£OŽŽO޾ô6r6=¼%õ€ÏÄ@3!ÎZƒ öë!§²Ë:À¬íÇ€Éå!†¥‚‰¶W3oiãê^Ï1µ`mi]ß|²uÙ‚`Ùðá¨`Zï7ZW¶ úÀÔ<®×ëu{õ‚ž`ßM­,ešþZ½“Ó,ìÏyÚÝZ³Öpñ%ú+s2·:N³•Éb‰ý٘ïÕV›ËÞ€,¾9‡ot6»ÝUo@¿:‡ï_i­6\¼EŒ&ûshíÐ~?£^@ÆœmWÂ×¾VËà3DC]šÅ˜'jQ¬Åø>}h Ê&HMS2Æ>Äs3:T3Àë—fì/ç†4/$}ASÕöÞO1äÆŒÞ«ão_?C¯ŽŸž˜‘@ÛÝú(w‹ñÂEºHF8 ™´Þó>ª'å±2§ˆÖÃ>Džaµ·–&û¸ÇIevìrïý/å<óP;Ž,)''KÐaÛk5—›òqÚöÆpf†Ÿq ^—º¡Ä,„+(_ ög&³Éò™7[¹bnÔáÄÚ}Na§¤Bª-,#f* –hNVþå&˜õ¢¨¨Fç“be ‚á“ì躖ŒÇÄWeg—F´íìcVJùD1ˆ‚C4b±‡Áý:TAŸ€J¸ú0A?À=¶¶™r‹s–tåÛ1ƒ³ã˜¥ÎÊ­NÑ<“-ܤBóTt«”Ý(÷úª˜”¿ UÊaü?SEï'p ±høpa,0Ò™Òö¸P‡*”FÔï hLí€h»^˜† ‚kkó-Èþ¶9gi˜´†Ó¤Ú£!ö# Bv¡,™è;ƒX=Û»,I–2UW¦Vì9 l¨kàªÞÛ=A¨›j’•ƒ;îs–A£P79å|s*Y±÷Úø»;›Ì ”[‡MC“Û¿±hf»ª]o–ç{oY=1k³yV³ÒVÐÊÒþ ExÍ­ÖV¬9—›¹pàÅya°hˆR¸LBúìTøŒ˜0ÖêïAmEð"Cƒ°¨¾d¤ ¤Aãdm0iRÖ´Y뤭–oÖÜé|O[Kv¿¦±‹æÌeçäâE;³°ck;¶ÐÔàÙÓ) Cãü cc^ž•ßjñÑ}pô¼?˜0%M0Á;+¡‡˜<€ä·ÍÒ?ÿÿPK!ân"øEÈŽppt/media/image1.emfÜœXUUÖÿ×ý£CEuMJ&¯uML,22*4TÔ«¢^ óZX˜¨h¨˜hT·¢Â"#£D£B£¢bŠ)š¨Ð¨(¯Ê$5”7£ÂÄ¢bfh¢†Þ¨aêý,ö=å3O¯Ó<Ïûû=¿Ÿ×gû={Ÿ½×^kíµ×^ûœ}°‰H;Éú%÷)sX9‘Ô"{rD<“fL±IÑ¿ÐÈN•1¤l«ª“ Ú%s¯Ó*³ðc§äÿÎ!“åjYÄ¿™$©â‘)äHŽdÉRY(«e¤x)õRºTÖRïjÚ¯seup½Œÿ×’FP{­,'ÉrY,s{Ûäö¶ùWª†‰~@$)‚t4ÉE²~Gqá"Y²k=Í[¿c¸p‘¬û'q­u¢ÃI¯õן¤×ª›!$éÍFŸª©÷wèu %šDòÇõ¿\+ IÊï@’‹dýæqge@‹/¹AS|Šˆ&3FJ#‰¤8‘¤<ö!ý;O×µƒIÚÞâûÍÆÆCäê½ÎçvþáøÆ´zùz‡òýãÁ^îEñÀÛJE~B½ÖÎ~IåÍâépºü5u†A+š¤4õ·hÑ¢^Êÿª3-ÿ×1ÉÙ%â^fëÕgºa»·%[ 9W­Hó›D=Ã|Yd“´‡Eü6I¸[¤|žMJ׋$̵Ib@Äë³I\.c:Ù&yKEj.4Ø>Ê` ØI¹7Ñ&e"Åؤä*‘ØómRv5íFÚÄ©íÁÊ5Û׉޶Iìí"Î$›ŠÔ±IL±H囤Þc°!Œþ"mR_‚/ðÚ$¢§À磠ò÷”HÆLèm)½z{ñið÷™H ù¿ˆ´ÍÂW|#Ò˜Jùø„‹lRÐÏÖÛ®g¨MZ¦A÷\[/Ýxw"o¼é¯pºëà­>Z¹×¤×)6‰ãá!z<ṵ̈Irõ¾æ¨¯´‹iSÏI\wQ' >:“éoIu7º#mÒƒÞý# w)ù³ ÂXv&m)¯n“îËlR3Œúéèë4h]n“¢A6ñ2Ž-Ñô½€Ô™Âÿ±ÈÆ8çe“”Å6ɱÛzÇ¿ýŸ"EYèà;Æe)4¿f—QÿKô¦¢¿²lh†D| e[–íû±%w¯u™ÿ,Ó\)‚4–M²~V7×ch éæu/)5m«Îáæ™5?Çë@h¹HmŒcúíÃ5¦ÿÓÏâ9’’Ìi6ÉœG3ü¶Þ6:Wr±¿ìKìJ±„q¤\¯«©çSi—Ƹ»»<­¶ÏÀ~@«ï#U·Nì»N·Í³±wêÕQ/ȵo¾M\`6]u1c“i˜-7¡ßÂ%èT. <†¼¶«¥^-tZI:FVßGªnÕ¿Ä ·ê–iúÓϲÛ3(I¥N:êUwê/Õ÷´`‡êó¹NÕOªoôAOÀ"üJòtl”1Pô,0èºÒÐòyÐ,£@õ7ÕS¿QT£¨c•Áöhë˜êw<`ö2ƒ¥Wì@º)Ð^Nß`Ü |ädl ŒÙ+{Êr˜g´ï&¯ôÜÒ¶+‚Ÿè(ß9ÐU¬£ņ,lì??ó¾þCÌÅt’Ú¡®{ŠZG1,-]©¶”{>±ÜvÕýBü†-yÎc]¨I;kD. nxL¤dëð"‰8Ý:Öìxœ̬ÝãD¢Xë “EÒne-knÁ炱`.˜{3×ãi ¶€=ù˜È‘œ0F†±î&¼ýDÖ­DB^Ö÷ëi7™8âZhO¡l-íÀt°ì]|ôµœ>.’Þ&3>³X».¦Î"bÙȳ2„Ë_os˜/W°&^"Òq9uÀ\°d.ýÍgãÓÙ§øM \†hSG™¶)£nu’2‘[Ë‹4Ñ—ÿ*‘ª™ð»À‹ÆKUô /NÚùÀZPyˇ–¢3C„í‡dB«ôC+![¤yÒÁ0º×Þ³¯CßðØz#k94ÚЗþÚÑ_ßè¿ ÞÒ׉TÐWÞíÐBnÏzt /¾;i¯EE´›Í Ȇ¾Jï¦  ÊAÇ÷"'2hl&”в%n¢°ÔòÔÍÐ%_ éÔÛŠ§RçQäžDý§0¹¾½ôÿ<Å®¶‘’ ƆQí*m;mG£'0ì~ {¹€x´lw ,F÷ˆŠOî,üë:nűþÝj0æfƒ:wÚÏÀ'°åáøÁkÑÙéøµ«£¡øÝlìu~;sƲöf¡g0´$ÜŽ|&tÕ.‹éGíU±‡ûŠjç9£ð¿ó°Õ Y³ýô3†xu.û•±Ü5>ÖûE“XsÒ™øð¢+˜·øîŽ…Ø¾[礮WÚ¿úê(ø)˜Aý«¨ïc=Áæu­Iu ¨‚o¥ÛµR¤ž~2W3îÚïì4‰ú× º{‡¯P>s,D/þD|}sí<ú¹9FâãAK×Gª-u»X+ÑsL#H²~–-HAuÔ·äô#vÍDG' 3Æ(-б\À<<‰=Æ|æÞɬ“ø’À)ÄÁÌùüS)gÎë¾*bõ±Ôéø‹xÖשøÅsÈOf,Àæ¼×Md¬@õí ì&`ÉØè,ƒ…iÕgùÆa# —1n¦¼/¼ÆY€– v®“ç8Yh£ò"ÊO?«îQ”è5›­†ÏflT±óRÃw=rÖ"G"ò7Ÿië%RÇj˜ê'~1¾Èetz¤ÚŒg-c.U‡¨ÀúY:<‹‚Æ\Æ›1ë#±]‡>|è'ãmxŠBÂ~úÑ/Øy<ý‚E`B÷ÁØq ß1ئRØktU‚žÖh}PÛ7ïCŸ.¦û{Ø׌ìôSFžÀúñ>‰¼ëCC'z?¼QÏûeýw0þ\­´?QDÂ#cÑú1¶võ¨Cy¯Þ©§öRf{˜ ë4xå~ÎéôyYQ@ýÇŽDÖwÓD#_ýhæE#´ÆW¢ÃÆ ÌWô¯óÅƈ0¦¡k»ZÌüê+ÈF²~–®íDQ§.…‹èìÃešÍªù³ý0ÌRó 60›Ã^»À¬íŒéTláô9=ÖИû:.RÝKð{Ž‘;“¼õ³ä>Š‚šmÌäÔ}i7z.xÑ`ëó{Èaj 硇—±‹Dè‚ÍçÿÜÇÂW\]?¼?­ ŽŽsüNäн^°ìŠÁ>¸ßª_m£^a<¸Y‡@žîzƒ:÷4Ÿ Æ€Vß¿†ß$ôAKŠ&Y?KC(ð}tè5l‰¦Ã=‹ýß aõƒeŽ£»ðïçk;%¿¦N,õ"I6ªï•Y¯õW[[Û‹¨ø§:r­úñ“Ü$ëgéGó–-VÕ:¥f—½w¬Š UWû¤NóN»hÝ‚ ]j_rJ°Þ.¡—â~Å.¯R¾Ý.YõNi¬µ‹û5§ø^°KäëNIxÞ.i;â|Î.¥NI{Ú.ž·’ü;»äíuІÒï-v‰:Õ)7¡“qƒ]ZOrJõuôkìÒé”Ä5Ð?Ê)þÕv©s8%7Ç.ÍB+ìÒõO‡$/‡Î·É_j—²¯R¸>:R¼Ð.mñ_a—îV‡Ä·Ÿý‰¼Ì.ÍIòÛ%bŸC\síR½×!Msà«É!qivÉü“CRgÛ¥¤Ñ!³ì·Ë!Í3h÷*÷§Ù¥¸Î!e)vÉÙæºÉè§Ö!]“Ï ¦¾à<¯]:ž7XÆòg’>=þÞ! ãì’ôˆC|£ &‚ÞL€îf‡TŽDî{èçl»m€ïðU@?Ãì’¾Ú!§Ø%°¹ w|õG~‡T‡œ—8DŽEžÙÉ9†z3‘7y¦A§¯]ò“/Ü?Ï!5ß±ï>×!¹Ýì·Áз<áÎ.öf±Ôÿšýòúù’½Ð@‡”ÿ…½ÙÊÛÙWã´ƒì mÈó!íÓ¯üÑ.±”ÿOP¿Ÿg½ßØ¥>Äëlü öˆ‹?òìÿCÊwRºÜAù;vé©g¯¸¾Ìs7:y‰ç½A»8·Qïetÿ<û÷môóÏ=ž‚~{²JÚ?É~løÏA*ÐÙ6ÉÏĶnCžKË|öœÓÁë©ÏXøÖBw º_Ã;š íÒ–Ës…Dtºš½Ýv‰; }îìtÚ{ß—¶vš÷qÝŸ³ïõóÜAeÃs‡·ÐÅl›dì¢ÏY6IÞaÞût±ñ¼9hÞi¹>‡Ïø¼øàó1öÓy>²žú`Å èŠòÜUæ¹tNŽA¨åúÌ;l\mžÁçÑïTÆçvtí¥ß"è§ìndý{¸7º î½‹î„ÿ³xWUÈ{É8ä/@ÖáÈq3|ƒ§)Šç Ðn|^o°éZƒ^0ó4›x®£ÁìùÁ|Qî>•ý6üD ‚:­ø- >c t[zï!ÀÐI½bî—A¿†úQôßJm·òŒétÚ!Güoæþý÷!÷¹èèAƒyÌ­d~4ž£m"2¿€MŒ¢Ý6t8=.0b;ïn)OÉ`u)¯Ü‹îÇQ׎=úà'»º>‡à£/AW̽ҹä™ó- ±Ë%Ø+cáÃgµ"CÜmØ :K]'F†ö»˜c· óƒ©w¬ù_ö8rtBïYôõ½]ÚáÕÓÇ!õÊS{˜s¼Cº_eLOvHÂNÆ*?ò ãó¶C²¢YW>Ã_ f~ã³Ï2þ7Im:ÿ™Â|÷³ÎLƒç•N©š…ÿ)pJá\»TVâ0WXŸ%â5î/#ÏúÓ™Íc-i¼Æ.ûœRvô?€ÞMv)ßoüy'XW„ùÈ)Ý›¡û>ùÇàçmÚ=m—ø&§ÔWÛ%÷MøªAæ=¬ /°~ìfbîêz×õ:þúøÙ‰®·³4àß·±N‚ÖZûkbk}N|š¿ ¯{Ö¢ZëÞ‰\×TÓ?u"ŸsJy%zªaÝkèß ÿ©¬Ï ‡zäU/º–†à·»?tŠlÛÅ:õ|²NG?€ÿFÞÆMȇüå%¬«ï°Ž‚ÅèQ±¡Ù`zŠÙ}°ý)¦a;®÷X_óÍ:_8ü:_JŒ‰bÎFÞt Â?KÞòZ§êVÖÃ?Á×:|*ú÷².£÷¸;‘›õ>6<…¼wp¿¾ng¼uJ>ؽÕ)éÔ/¿õ™ûUÅØØYH¼B¹w©¸É)ÅÈ@`ö*ê`wNÉC®ìùÄ=ðš{)zB¾´ô¾Ö.)ɬ站C£°»Uè+z+íâ‰f½Ç>ãtOƒ¯o’ŸJý³Žßbßî¨xìë!êÆxmb]scG7:$±ö»‚||^ôó:—ôüÄÙéigžžå0¦ƒÍ¬¾?³ų~SÏ3™uô[ækëÝÌÿóôD»$—8¤49î'>8ƒùÄúˆcÜË 7‚qÜÂz fmuHÆÙðQð˺ßxzÂ!EçÁ×nÖýä{øåbèþÝ!™Wßüyóèæú)‚m£·àd§$ÝMŒ7|1t|Œ×=ô7‹8s#÷çPŽ/`>>ˆïZ‹}U`+Œ‹çqÆãVì°’¸é.3W"7¡ÿ'±û2§”>E<‚h\gÍ¥ÿdþ•w™µ­vWe#hÙã j_ãÿg ç¿¡Kü¬·ƒ5¬Fçú.6ð>{:kЧ¬ÍÓÐ?¾®y*u?þ(×¼Öóµ±nÑÎÿ¹¡„N ôÛ¿À÷²¾:ñë ‡1ÔwÜÝŒ-hñúkäK‚oÝ#üŸÜCýš½Í²WéétH+ö¢cáþ…=@=åE_`»ø‘¸?;ÄǘΜbNú?uHõ|ÞAlÛË:@̺ö3®Æ¶> &\…xŸ5‡5à]b¾ll¸ôcK¥‰Á¦«îamò’'vÍO‡\‡íŸ­Î%f= ßrs-[ÌvØ%ÿ{üö߉¿Âr¿ÀßbM%Îeœ£þŒ_ÀÒ?gž3þŸbÏ­œø^?bN ý~òû‰ÙZˆáöâ×ö±–¿ÁËfï7€ùÆE/ÐVþ–õ ̺±ÝÄ+°ƒAÐÛNa¶3¯c3?W¢gPÏîèzÝVa}[ÖÓÞy4Ü ûRø¼ ;ÀU— ÷ú«¤üEÁרïLG/ï]óY«á±q~ZÝ×à_†Üåös‰-e¯>?ô,kô4bƒçY“g@‹ØÁŸŠox ?F¾BÌp1~l‡Cª.Þ.‡$Ï#¶Ø„—ööæ`îé”7h–ƒm` §DïÁ¦£k0u>ýƒQ`[#1åoá›/‡Þ›ÄÄ`ÙèRÞr%õÞ&ÆX„ï ‘_ÂX¾Çž{íšá L!毸úÝÈw#:›oBŽoé/ÿ –ƒ™`˜Æì.ø£¾µ6©k_î—,nט3q™\Z?+ÑgƉÔ)[˼{Ö`›"õ`ãW"°æb˜0j=Í×@3ŠzÑy¦öq¤ê°r Ïœ™Û??ý,ò˜½÷¼W$~"pñþ% T?S¾™÷`ô]è¿UxïðÑ7òÜÿ™µ š³ðWòÌ{ëÁ帎)øîK‰'âŸçñѳ•ö`éïyyø÷@m/ÁÇQô½Ç`¶ƒÿÂ?k\“wî7u¢ Úäu"+ý–ôE—¨ÃoòY¬!ÈÜ}.ëÎXèfÝ™>7úÇ0è¾k²‹æ™¬È®çjõ{2c©çš.#6º•1`àRÆèfô2z71s©-º™Ê<»‚òI¬~da¬“Щž±^D?I¬‹>ôw>÷é·Ôƒ­Œ@7±Û™¼»Èš‚áùØ žƒG±ÅFú 1æU‘¬…lìhÖ–S™×¿!¦9>ûÀ¿›&N┌ ùËûÓé;;ùšñ‹b >1ã’þ}‡žÞ3X2Xô<Ë{›½´a¼ZÁÆcÈ¿e0¸›y@yñvlGñyx¢¾ûhAß»:ô[D¹›þ °Ë!غR›,½Ùâ«<êžM»5\DßW3F Ì%Pß_¹ÀPÏByFs?‹ü8è,ä½¥ùÒ±+õæ!c*icJêšEÿ”¥R–|ï#g¢»ù葺i´Ší,B†ø] ÝIè‡>"¡™¼š˜×9ð<žùŠQ« 6€únÔ“ŒÂ& ÐO"4×1/˜+åEÐM |#²ÇCg õ±ûìesïIòg ßhŸß0÷ÉïÂFRis6þe6ìÃV¡­ñY4»3ˆÝ ¥qQÁVƒQåkÃÖˆK’À,â² úX‰ï£åãÆ¼‘ü$7ÉúYsJóI$­3–M²~V!xH‡¾;;ô:†{š÷ͪƒÇ@rë÷óµ"TüoyH©]!®clüþY<&ï­Ç޲ÙcÔp½Œùú㽘XïaìŸÝSÂØ]ÎÞ…q‹™¾'<ßïe|™ç!îk,.ø˜M?_#:i aÓØ„¥£#ÕZ÷ãÛÎ16à ¿‚e}¹~Èü£ŽkcÁ<«jBÇqøô·Ìº‰n[cÑiº>Ý&Ú&Ždý,ZjŸ ;ð ¬Ñ;ñ7§Þö vakéï]üLcÛÂ|Æ7—`¾ýhüq×?ðYŸ2fßÃÃçø$ƳçÏŒñ·\ÿ…²ÿâœÇ_±‘.—Ä\w@+—{yý‰Ú±cdK$ß8{ìÀYïÜ_`o ¬ ®7ŠÙ‰Gcäõ½AÊßð‰Iì½:‘möÒ:Ä¡“XC ŸíE²Gq£?ú­òÐ÷ø´XÖòïÒû\&‹qÒu! TzN0tcc‘Ôkøß³ :=Ø)X‰`6ÚÃýVaìÉ·Ù{ÐcÇ?!{;¨ïSˆCô¹Qùô~çQÐ½Ç #åqÇã÷r)Çž¼ ¾³tw¤Ú†ÆÎNÆAm#è°,ãg¿´†"÷UÌåOñßKðC­ø¨LâƲn÷Þ`ÌÀú=ÄSK‰ê˜ÿËh¿Ý`4Ìb‚•‹™;/‘À¼mÔ…^É Ì7è·>…­¬dÍy”WƒmÅoå2ׄnõ7c£×á;îÅǰ—»±©›¨Sˆ­ÝÂu¶´û¸\O½kIw‘ {íÒx¥/ý>"ì+–Ѷû‚—è2Ú/f K'=}_‚ VC#ÕÄ‘£é@ÏÍ&¼FÛÒNê‚黬i`-ŸŸ¬}3ÐX< 9ß@>?²p_¿iI ~þdŽ~+^EW`ù+ÐÈ4¨çÃ5¯çú#_DÏÙЂŸ’æÚ“ð†|•›õ5Ó|÷úÞ‚o.¦þÃôC>ó1hq_ÏùÉWWÐ'úL5TÝkÜÐÜ.(WÔ“>D`-´ê!äÚ ÆA+ô¢+E=[§1KïÙ¾rú­¢ýVä{Úð£ºT¾ôl ¢ûYXÿÚÑWl 1'c®g ƒ dòz–LËÛÁxÓ3dú=‹žÉM…çZt—RB›×ÑÍF³þµÞ˸÷»Î5Þí¡¬~ÿDÿØé^ìîV®C¬o7£¯}ÐÉGOï!çMãÃX|#vó>õÁÐè,j¡ÿÐùGø¤|àÞõÈÛÊ5(M^Q¿åV¬§¼lºŽ±øÛ¿=}=° Í`(Œ‰ŸB?vŸÁÛ5ðêwFÏÑQ.íAk¡ZÉOr“¬_r_lÐarI€ÖKŠ6E½ÿ[u†ó™½Žáž¦ÿ›ñs4k@Ñ›‡?ã»ß߈œ¬_E;oòêóÏDlÂ˺RC¾N±Á¬'º‡o"Ÿm}жêºó–Y·:É[}AfÕ›Ÿt$é6ĺ^Pgt[‚lÖϲ‡ã)¨¢ŽúõP¿KÔ˜@Ïrj|ßÊ<Õw!eOcßKØGê”Q'êó/õõú=i ™¹ ƿϼ¿Ä`у鳉>a\¦³Nu°¾LÆ|ßžH[0Œû/ö*“X»Íý°…z5ßÓ)ØÔÃúåe&î,OâNÅÈHƒõ`yױħÉð †ÆÁ'˜FÇ|ÓŽÇ~@éÏû8y ˜Ì€ê7aåI¬ì‹ô{M_«Ì‹ft!2€®0ê¹ûD7¶skï bùóÑXv‚úÌH±‡ûŠ¥ þ }¦õõ{ý@üÀ¹ðG¾s$ú8Õ œf0z{îwǘöº'Ó~õ»Æ |¸N'mÜWy‹©ßŒüqa¬ŽæýÐMž€\ô×¾›‘«}{ú±GHa,Ñ_ÎTÖI>܇¯ïa-ŸÉø1>ú=­çoØÿlƬç —OÑÙ%øüñíŒ{ÝGÔ÷£ŸÖ¸yè»h¸¹@ýͲ­#uWÏéfîdb^ÖÏš;GQPŒå{¸èÏþpþ[ê&ïÆ2‡[1f%g ;Æ,áLt 怌]ÜpæF¸#U‡Ýo0¿ÑCT$Y?K‡'RPG}îߺ=¬«{ð nì¶‘²~èÔï™ô{®–ã™Ï;±Çã vG²ÞR®Ïµµ]6¨qO÷±"õõ;,7í š˜+'PÌíOÊc£ 04€µú%ñ—´9y ½ÖæiX†#uŒªþ‰üèEÇè²sßwø¾®ß®©//c š)wúL¡õ ¾kª“Ü|¾´úøE‡˜›xH‡Æò‡^3ìÿ+q½EMŒƒdø÷óµ’Xæ*6Ò0RtøM›6)ÈP’UçB®#H~’›dý¬9¥y̸÷Y{`çìÍy«$§U[ïËø9˜Eßáì=ÏÍYÉÆÝœYº†³ŽAÎNä’ó+9ùg+Vpv²žsW  ù2ïó'p¦òiÎdœCù#œSˆ#ÿg@s&³:œ•Ì,áÌÄÑ|[QÄù†>ÐYÏÙÄ¿s>áîÿ•ó[yœÇjqHÉ sÖP²¡ÿ.g9O ç²3íÒÔÈù­+9Sv,à\Ö¾‘5ÊàìXjyÛ會LâÜCÅåôϹˆšùõ¼ƒæ3†\` ˜Ò`Î?äïæÌ#ù.ÎWèý 0sY ž§èÜÉ7œ»(cjÓw˜o7Ò^ƒÎgè7­iœ5©CÁ³©ÿô/6ç:B³8 ù<çÙfræ’smÓù–¢ ¹R8×ñ(ýN¤þÎGŒ£ý:êÅ£‡87‡žòâÎyŠ5œ±†³ÑË`Ε,4gKs}I>޳1S¨ÇY›èó8"èç eM<Ó»Þ*͹•äÍÄœåðlàyz2gån`½¢¯Žµ¬_±œ YMŒ1”3`ö»$ƒÕtÆ€šÏõù¡–§äðÜòTÎç®$öÄ™½UÄ×nιqß5saÔçžš×癵ќYÎþtý‘×s=z?#ŒmQœC%_ÜŸ3%ÜOµ]¨í²¹¯ïNJ@Ý+yAÝ+i=}Ï_~|‘ouOÕ껕—é¯]é{(ՙ‘ŒõJÑO±/:§}Àiètþ@\ݺðü˜=^Õ·æ|›ž‡ªf¯§ç£bÀ¦¯h»˜=]§ÁÌ/ æ|A¹´Ñ®ã!|6XöÏó–Â/®¿žþof}M·õ~§7{_ÌZ<ùæò,ôo†@o,k‚Ëì-"zˆcâØË´²VŸI;ˆ5ÁÌgˆÃ(ï¨ Il´‘8(ø¿ˆXéúYGŒ6±0¤ñäÃS2|¯6¨ïÕ5¯ïÙcÇÓ6‡¾'Hï{ø¶‰ì –Cg2i)Ï¢§ÿ.¢ÿ©ðµ™|Äó‰."Þ»”Xëb³¿Ô=G>׺×ÐóIÜsÏ N£®~§­Ï†‹¡ÕD_ºÏÒ¿ß ïè#ó¹>²;X«A4~ëý;@ÑÈr?kÛ‰Ð{Œ=O?tñ}Ë>k;üC ø2²MŒ÷Gt7Kï>ÍÚ>ÈWÈøD"/X@»°¬?À=bžØÏL<šñüœD{ÁÎpmc®Ñ¿Ëmþ–zNìòdúìà D¿a¸ã±£AfŸVt òôÃÖ3×¢°«!Ľ°7äÓó%iÃÑ›<ƒú|s:JÄyám,cîCÖñÌ/ô'“èw{¼)ðãg¦ã®€ï¹ØûZúÁ–|7#ãBæýFdÄvÝUèIm¶9V`[;¡›Ã<ú–Øz%t¿£?0úÄaì¤~5åÙÆÆ›Á¨ïMy ºÑwb¥È¬kCÄ1ø¨U\»ÐsCÿ.¤ÿZÊ­ý:æñ)ø¼óõ/•µúzÒ`d·ÎPæízæÈ™6ñà¢Î¡ Ô¿×¢çtõª¶»Ð÷xx/d.£‹èäN=¼˼†6è)á&èL3~¬À‰.ÿä¾&^º‹|Òèz•Ã#uõ[9ý[l±µø °õdò1o_ƒ0j7s|€ú·ÜôsõLxÝIÙEæ¾~BÞ‹“ýÆ'Et “¹ŒU7t.£¿£Y×2¸ÏÚŽNõ@Õ¥~˜z56r!1ÁjúcÖoý†Pý‹~S¨ßÇé7†M7B+“˜¢>+ˆ9£ÿJÖëJx›Aýf1ð$~e›ùL¿iÔ5C¿qÌ}‰õû5ýR¿ Óo"³_Å÷Ñ™õ›Éø 剑Y¿©Ôo)õËø½ÈÞý÷¨ÿüý7wçVU•õñ\}¯ÅÔÍÈ(©H±0Ñ(­PÑn†JI%&Ö-™¢Ä"Ãò7»&ÍP’¡R¢QR’’‘‘ab¢ƒŠŠE…I†Š 5T4Có>T4Îç{÷=Ôï8¯ïó>óÌÓ}žÃ—}Î>{¯½öÚû¬³÷Zë4Ò®ãÔó1ôYعB.‚ ʇSiùtÖE6ÎÆNösÆF_ô¿/Èß=ñkd:šóí<+† ?}ËýCC¬„ÿf>[@ù”F‰‡nìOÿŽì]O~4LÄn•g¼|R›{ñÌ¿Åø¦Êg5ìWƇµè òÝM=gÁ»Yø Ó$.„.t#ùÀÊ·&av¾ô‰|gë铚—ÑG G–¢§DGB¿K¤OœÐG££¼ôMóFôÊkÐ1*¸>^m¢Ük±™®Ä×&!ØŠÜŒ-X fŒ…çUØÛŽöë—òõuìB?M2úo󯱫}ûÜI´§»Ý±|”äSì½™ûý°|Žs¦ k5Ó®©Æ'¹v:b[ˆeÝI»¿Áî8Ýø0·ßM;~Änx}k9ü¾Íòyn¹úzá£÷2ê°J„~ï³iW‡9×øPÏ™O9àËót à½áQô×+ñ1ø4ŸŸEðýr‡ß§K>Úòé’ÏvûÊŸI}%ø0Í7¾qòñn(öû|gòWZ>áI¯Sÿ›ø éz5ï/ë)¿¾µÐ½ßøÉ÷ÍYÌûÊGø§ÁƒÜ· ¿’øt½ˆls=ÿ®×q¿ÊÙ‰Scìøª½‰î&„·ƒÛ)gs°åËÞF¶á³´Þê°<ÕôÓóÞÔ¶™ôNê­]ðÌÙM=Ô»—1¸Ÿú:ί×|?í, >Ð~gÅ\ãä` ÿ—ïyY-¬Ï0óV^07~ö¼5‘t+ydwßñ'Ëo“ýφ{˜“þÌT *±ü5ò™ãe»”Ä^È]Ì7½CéÌÑ¿âÿéÌ·g]Js·“ùH럷1ôc.œÆÿ3Ö5_ dÌÞjæjŧUÌàÜTæ*¦°?}9ùo¡¬8ÆìMèNnÊ`^TLbùHÕ3ó·bvÆð¿ü`Ó®5ÀÎ1Ì_×YJç’OöºŠáÙfqè™w5mÛF sSŽë tµñÌWÔïÿ ÝŠ±9Œö9’iëqø4Éð%íFÊý¾ÜÄùÏyƒ²·TÝxGw€níÿ·C_‘êI¥üå<ãéí/kEvŠm` iéÕ²1?‰âÊÈo¦½ýêNÊyÝå.úûê¿ÙØÇý™Ô ôæü£å×mY<ù­‚ÏñSŒü®îŸ-¿§sFq³•Gñ`ÛhG}$ù‘ß\˜‹¬¸@ÅåKë& ?lå'9t‘O6ÀñôUX *Ƹâ_Ë?'òÙ¦édÚ!~6ÁëЛÞÝ «{½¥—Ú‘FýäÑ»–0…¾Ð=yÔ©XÃò rƒ¥\êH¨w']·ë8ºdë•Àx;]² +ÄXCgk»ˆzœè…0†Bƒ¬È¾è g"{}dzàGÁ¹Œ©ÞÈU,ãév%ãèÛÛ‹§ §Ü¿ o“W(zP=»v÷æLÃ0“§ãrÆs”lÓ:iGá¥ô×W”=IŒIÙÕ9Ú¸n1ƸžÉaý =<-è¯åä)ëdme¯æ{Æ ãWö{-ÈkÖÈôQtTîÍ9L¾`úþ rL™aµ–¿Ý¾]ä?ÿÄú£ôeÙº÷PÆ€ó9kº7sMÝ»èâ—ÀÓzæf°ëãr0ýü m§²Ã‹ƒ·aÇ_ ìôZ9o÷å(šéäHåˆà°6•îËáâPì¹RÚ+~G„ð'ð³ózHkÞÈù”9 TÂÜfƒâ‘â¸Cç3±üv^PvÅM̉`õ!øþüldÎ-xé\À¼ù|ñÒw´Qñžä¡¸tÅ ü”½…6?Ë­!%ßJÿrȦ!Ÿsò7‘?‰P´ } Ú§˜ÚZJ¥-ŠÛš4²¡!ÿ7ÛZiÙf¶À7ɪbt—ƒV²²},~™L¤?"¨˜ßŠ9¨95}õΧޱ†oêçRø(”ͦ0ÍkäÀõ(í! 挡ðß¶0ç1“vÒ?ånhϥ߯æÜ“Ð?Þ¯0r'›Àˆáðe-e\Åù7É%y*á#iÙ›:â̸ËÁÿoÿ$Ó£hš“#•#‚ÃþÙ²­t_‡¾­’M=ø¦ûg罘3ÒM‹hâÞdy§#xw‡ÒÃeçøÇòoÓ÷VÊgÁK7zrä_ãœ/®ã:X2Ý”^—/c‹ó’1Åi–_tyõS&X‹¬è~ɾ—¢y­‰rS‘…ÑÔ;~S_¨ïÇhͱ úä¤xË]wÒgæù5ýI;“‘¯ ¨LýóËyú"ÐÅhgm¸á‹ëlÞyçY†_'¢3ýLÞ§µÊó‘Ýé†ÎaÝTþÄÿÎi_ÀCžBäGòe÷ÙÉô³žWòùT?W ööÏæÇPNÈžGsºì{dç#{ŸŠdÚßðGö@Ù×#'¡†·É´ÁÇø¢ s®Ð)û£¢‹à“ˆCž³GÒ¯ð5'Îð5‚F&]Hš 3ágÔ•È÷kAëáä“=”ö´n]–õ¥>Î{ÏEŸÁœsýÏ<¿+©Ç×Ç Þ/”Î ã :Ü`&(:ÿ4öTäçZÊwdÄ‚÷co?@í}Y>2#…ÌDè×UöhÎ7v0’/ñ[æî«‚?BùmÈ^­©‰º‘ï¨OÈ Ë!7ó¼øÞL†§ ž)Ô‘ OÁV0ûô’Ý”ÆMó§Œyî³ûìdúYó£t·°ÿŸí§ÉÎZr/;lÅ9*î©°*‘¶/@¥½>/…ÐÞ¢G‘½hZH;àŸP:ˆ0l_L{GrîIÚsí^Âó›þõ¯W\FYKy¶]ʽÏÀŸ!< è“ÁŒ·z–™´ì*ãbàÁ ò_-…ä_W2PîjžIý9ÿ"eŸßKàÑyŒõ— *þ­Ò ¥äãºçæ °iuG"_`z44r]ñk#Aíq9¹Oq²3(§:ç¼D?_Nÿ¯¡,ÚcCëxµš¾ePr&ý_˜Çuat)®®ÖqdhQŽâsËÿ!ï*Ú=~;@èÐ÷F²^¥Î¡ŒË ÐK}îrÚ ] ¹op/ôè&”N«¸·9 øçE>(~enBÆA½)–k+¨ø¼y›é_òlAö¹¯d[€Žw ]a s¸¹®ñ'ÝÚwmç~Ù#ÖSnLí´wýÈõDò‚Ê/ÝTöºO¾³ºOüý¡WCs²4–¾sÆC箣m䓯nÊÛÈçµ—4g}]ožtèÜI_‚q»«”ޤ´äTö\Ò‰¥Ó7º©o?üÍ}ï!Ð#ôPzxÜûð‰öv|hÚa1寬wRß ò1ò¢wVùÚgåZúL»Ð üžËH+yv3užÃ=Ǩ'œ>³ÎFv¸.Œ  ÊíCûÓŽ0òS_do3?ä»àë!äçLîÓ¹^×hò„ogÁP6RŠçÌu¡‡üÂ4ÊÑ;sçÜ€²HG‚Y`lƒ©Gën^®‡}@{NGþÞ§<ê-›(¯LƒÞÖzî§}¡ â¦ç¼‹œ @6÷rr¿ÓAùHÓAÙ(&ƒ²QŒ‹ú! »¡ ŒÜŹè?¿3j(²:« ö°© þ~îž”B9J¿còänE^¸GrSHý-[kêIÙFûs~;ù@­ÄDñ“vú¼Õð#’ûÀÆóL¾&ú±~'sMƒ¥`>´ÉF-™ûeWj8†z„ª_hmæ>лÉô‡âàë>½›å©ÜUÈ |³’2ÎGöž…ÿðWþSÂÒ–/ƒ&èJYÊýðO¾HЯ8ýqsí èÇ_í_ŒÌ^B?>NßÇ80¾„.â¾!Ì>Æø¥Ô»>ÅBç#ÜgPvÄJ·€òÛ—=±ÖqôõÉ¢‡yι‘I0ïh›À9s˜{ÇÀò%\ OæÑÞd€ûÃÆ1†À<°€óz§Ñrx|™ 7šr„y”#ÔýaÉÌ Ü§gUÿ¯ß':Þ¥¾Þæ¹WÀ}öÏ–—Ó9¡ñ_J¿h~í¡|î„•{ &®!ÅuÅÍo¬£ý.Ú¹R¶|gjèG¡âßW€Î>ôuk—€\- ä·i¥º9R9èöîŸM›NÄs(ÏhŽpûgçéωHŽŸÛþüÿ(®éøwú)Öž§,“-„Ê?›fiÅåÓžšâôU¾bâö•¯5qü¯Yqý´—¦8¾ìÅMbÏm{}I&®¤âú–r}{cùì©õ'ý[öƒ{;¬®löÿLœÁ;Øsû–xÂÓ-`x {|»Ùû¼ž=¾µØ ^e⦠5q .§ÞÕ!–ã2èZ…Í\,ögÏaƒ7„ü…ìéÆ3ˆ=4ˆ½ÛeØÒ]Ğݣ!VÚ¹ÁVõöR™RF°‡úWÞo.¥ž¯xoLÌ¢/ÐcØmEï-’óŠå§xŒ%Ÿ£—^ÆÞ2iÝïmc{<{Ìßò^ôùO öÇxRäTêõôe/ú£@×&C—l‹†±ç÷{²£Œ d‡›øSc/y¢‰/©=\Å›”M£âOvÌdou8}•Å´Ûa¹g±ßŸáŸâWÆÏ‡_ÓÈ—?< &Å» e¯Uñ/s}ì-.pXÖ"ö¬±Ç *^fÉãÔŸO>ö^OSq8_³ŽókL>ÅßtV<ÎÒÅôë›Äú|‚þ|‡=_Pñ;g3æ=âuR³Þìñ ;1ite’Ž›Ç*Ø:»ƒ¨¬fO6yp|L|È…Ô{ÈÔ× úé«ò  ,z†=kòU"_ 6RŽÒŠ+ê]Ž=à~öhW²÷¼úW÷Ä}~U“^Cûß"þg)ö‰c¹:^cOx=v´¯¨Œ½t”îèWáÛ236~©cß»Ÿ6—œx¿"e7ýöûk;±;)æ]nüyžw?η­fL|ˆÜPFn{ü/±r€ñ¹–}8ÎG‚v¿T¦W1΋ CƒìÙÓê^{9ŸS¹[Á•ì_mfÌ=Ç{p%|kßbÿÿYx·‰1TÈ{4Ø ÊÞ Ì~¹q9{BÈdMëUk™3—²þU›Ï<ôs¨ª­Oñþ¾‘rž!åÄ®`^ Àbê©Ôë3@]—C&˜ ¶€ l:]Ð+ûÌ9´­ Ýfè®´Õûs* *Ž¢{«IÛ¼ø¥öµb*;òN<^üóÏïØo^ïsáóP׬…0Év¬ãyS†¾‰ÐAYµœï|XA»Ž_*«"óÓ ;~.Ýã¥/çZ¢y&¤±NäBNoa=òæž›Y§ Ao‚·<³ó“YŸk…oáñ§f¿ò²z{è ¾+ߌLçPßlõ´[pžáZ?Nú‚ùŒ|ù<ûe?Òð5üŸÀÜEùúŽ·l»eœþkšÔ“ÚƒgÇ$®C—lU:û£Ë¤2®b s*e bÜBw¨ýzÏ%ôùm¬SÚüKíSëÃÇô]úÿЧ½8w^&2÷|À$9–ã#ƒu 6…ÏôA([̲#ô}@»Žÿþ»ty½è°þ>DÇ÷ß=€¿¡pÛÉñ¯Þe4]úÉν'˜æO™?öûÊéþ=ÀóH¿Oݨ{•n~Ÿk\÷#WmÔu»Oí:}ÐW÷šw’º@Q*Юs9ÿ;ÑÝ|{wèrÒQB·¢ÃÕ ƒ ã•ìÀW¯™çRÌ^tUžƒ-ûLÌÿâwÍ7ü±ñ7`'yÐ|# ­É|3ÀJ×tÒsÐ5üܮY;]çCtÕ¹¦üÌûxnG÷Î$½;¾{ÐéKVÆô`ËíÃj ºøý\¿‰çn*öIØ;&SÎuèÒC)ßÍ3"„w !ÌŸónÐgÁû¼Cœ‹N~;všÌSmçòN²‹9ätì`·3†z`Ÿºý+{Ò*æ± â­náY~œ²Þæ™ý#úñfæ² µ’¹ó{æÌ·¸ÀÊïLZö¸UoòŽÓÎõãí0zý+ŒÁ%¼_øÐEž2¨oF(­oHä/dn…'Ù^Æé•²ï'Ý Ú³¨Ë í\Oû/“O{Ê7,˜2Éç b[ºXG¸º¿`-l=Êz¨¸Aµ3ÙªgíÌ®3XÂZ†b†ƒ9ä‹®e ’Ψam"™ØN 悲Ásoc­bíÜÂ+/(;Å@­¨`Ýæ^îå»TôtÜCýYã"­oÝe‚Y`*Xõ:ã ”oJ ùdW#ÛÐä×P3Á¸ Ðv¬ç>úº¤ú§Sßs¬Iƒ²ÙÑw×óW°– 3Pe;X´Œu£;¨¯€õ50 ¬µ‡ ”‚b1ÈFR>N1KY¿» :ž œið{1ùÒèÐʶQÏY¡âñ e'§}Tù6eòshñ±V4•þ~Ì #€Õ^øœý÷³.6‘òÓY#M¤Ré—xø:–öŒ4X4 ⲹÜða0t§ü(Ö¸zÂßsX_jgNsÁc¬ÇÊZòaÚp ôî3ØÓb‘ÃþÙsY0'´Ö©¼ ¯ÒžÓÌúWZ;çOó^)§¢ž7ëÂÅ+ikoÖºá}ý”QHߺ8·„¾î YßãåCv¢ÍCèÏ©ôùhê™ÀúW õÐFÅ‘m¿†>›Î½qðh6ô¢’§¹g V:žCyFs„sØ?;ONDrü|íþçÿ3%Z:N´ŽÿÿQF4u„r íV¿ÀÿèûÇOãÿ0ý\úÿÿÿPK !ý1*¢¢ppt/media/image2.png‰PNG  IHDRÛÛ¦oèbKGDÿ‡Ì¿ pHYs  šœtIMEá ×y´53IDATxÚíÝÝy9€QÅë !¹¶¡i$jˆF’ olÀcƒ4ôÃy/öI¼Žíèðif€À¯?A“öb ØŠ­ØŠ­ØŠ-[±[±[±e+¶b+¶b+¶bËVlÅVlÅVlÙŠ­ØŠ­ØŠ­Ø²[±[±[¶b+¶b+¶b+¶lÅVlÅVlÅ–­ØŠ­ØŠ­ØŠ-[±[±[±e+¶b+¶b+¶lÅVlÅVlÅVlÙŠ­ØŠ­ØŠ-[±[±[±[¶b+¶b+¶bËVlÅVlÅVlÅ–­ØŠ­ØŠ­Ø²[±[±[±e+¶b+¶b+¶lÅVlÅVlÅ–­%`+¶b+¶b+¶lÅVlÅVlÅ–­ØŠ­ØŠ­ØŠ-[±[±[±e+¶b+¶b+¶bËV÷zï'¾ÿæ·… _r]C!ÄŽo|ÛUÙÞÑm¿Ê~ÐÒÚvEöœ–î¸çÉ·iÃb GœÛ;dÍî˜sûv÷1»ƒÍí*müî³î@s›Gkt²Í¤…;Žm6-ÜQŽ·´ºƒ]ßæÑÝlßÊháöoûVþåàöm[x°…;îñîø¶›Æn϶[iáλ'ÃíÖvûØÂynÕ¥m•±5¸SÏ-Üîl¸óÎm¬²%ÃíÏ–‡ã­ÁÄ6ÂÀv !™\{²†²­?fwâ¹…Ûm² SÚ.•O“ îìçRp'«m2¸ÃÛ.{JÁµ'k@[ƒknµ“íbpŸpnÜ lÝãèxkp§±Mp‡¶µöOº'܉·p'>—JN¡'>ONÉB_mÔ5ú]_ßnÞ´å¥TÒûkþ-Y“K§Ö[Ï<´×2§˜«l?µbì›ÁÀvm ãí…;ŽmöÎ ·­£âH¾|ä°ãÜÞ•Á­ zù?MmáV7½ø¼CK[í`zö'-m î¦;Ÿ'ßÛâ-+v$=Û–[ìÉpwÓ/ÝŸü°k èÍfvý<[ns.õÔ“{ÚýÚÙ¦ø„¸§ÇË{rñIÞìÔèûŠç6Ö¸!,3Ꞻúi2çöò±ž´epÃØ³{êù‡;dÛ®=Z›6á£{ì–wÈÙ“ã/à¥/ßÑ+ìÉñƇS!þW4¸QÕžÛ;lcæþœ¶íuNß¼kÛoPbÖòeøF mcþ:¦;y#æçR±lõÓ¿ÿšÉv½”,êí¥Ž±& Ú¢Þçö¸T_Óø>» l—s+ D´=ÚÖa‰ÔJ=à1¾˜um•÷9©Ö8eï×@kA±ÂZüpµKýáOÏ6·Ùá˜Ê¾DlEúý)ŽCû½mÕ¿Ä5n,\Ùö¥¬ÿß“¿Þ£«ÌmZ_“*+RóÑ…J/Æ{Û²[‹vãìöpà±wêAúæó“kÿ\1u2¡¾éR.Ð6yžcì\õîf)èøÚãíõ{QÄJ8éë/ãxªû³ã޶¯û~ËT´-ù¹c‰sª¼è‡ðÍÜ~~§XéÖ–¾þ:ÎèZ} ‹œÓåâ½v{ÓŸ§’Î:@§°z×îÇÜ^pc›U sûtÿŒdÙy4^ë]©d]û==lé:c K_;,nç|rÙÊÎ+K¸çñ6­í lÓR>ŸÇÛ‹îψʟڴú‡Ðn9Lÿ´zW¶/ÔXçqs²ûuf{>¸ÛŸ‘À¶î¥\æ§ÏIhÛw}.•s”â–í˜ì#çö˜Ë“Êgmƒ=9ëŽÌ´ëªÇs©‹Så»cþ]h›ooLË®uÐvqž¼ÿó?Ð6°}Ì¢£mw}»ñõIÐöi{4µsÏí®ƒ‹¶™íÑÔÎ;·Ç]më=y÷Ó)µ±Ýs´Œms»Ïà¢mmK`â¹Ýëtʦ=y‡»•Ñö`{üÄMh'›Û3‡„v¾=¹î!m7¶Çʸh;šÛº¸h»Ú“kâ¢íìx[{[VGçRÙOW6¶ãœ'Ÿ .Ú»|~òu%ÏWF;†mî¿ÛD;О|ö°AÙ1m·s{õºSíL¶×¯¾ÑÎc›÷RhDz½Æ½“—ì¶Eºh± Kîû2 Æv]÷[_²CÙfÌ.ÙálCXîy²CÚ†Þn -ÙqmÏ…½î挶ÿ®‹ÀÎi{Ìu>[õÛ‹%`+¶b+¶b+¶lÅVlÅVlÅ–­ØŠ­ØŠ­ØŠ-[±[±[±e+¶b+¶b+¶bËVlÅVlÅVlÙŠ­ØŠ­ØŠ­Ø²[±[±[¶b+¶b+¶b+¶lÅVlÅVlÅ–­ØŠ­ØŠ­Ø²[±[±[±e+¶b+¶b+¶lÅVlÅVlÅVlÙŠ­ØŠ­ØŠ-[±[±[±[¶b+¶b+¶bËVlÅVlÅVlÅ–­ØŠ­ØŠ­Ø²[±[±[¶–€­ØŠ­ØŠ­Ø²[±[±[¶b+¶b+¶b+¶lÅVlÅVlÅ–­ØŠ­ØŠ­ØŠ-[ Ü_ŸŽO^˜BIEND®B`‚PK !—t‰TTdocProps/thumbnail.jpegÿØÿàJFIF``ÿÛCÿÛCÿÀÀ"ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÚ ?þþ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(®Æß¾ü6[vøãÿø0Þ+5œ>$ñ—¤\Þ*pígk{sÍÚÇÿ-Þ)0 r¢¼ßLý­¿f]\¨µøëðÊ"áÈþÓñ^™¢±Ê6ã¬Í`–RP9S"bH÷FÊÄèz*†¡aªÚA¨iwÖz•…Ôk-µí…Ì7–—0ÊÉÍ»É Ñ° «Æì¤ƒW(¢Š(¢¿Ÿø)÷ü×ãfûhþÌÿðHØ#Zðw„mÚÇM¿ñŠ~<øóÃçÅþý—~Ùh¾,ñ>«ãÁ7‘ âÅøC~øóÄžð®¿pÞ„èšD~"µ{oé÷½¯‰¿à´Ù:¯ÄoÁo?à¨úOí7†®,4­sÆ^8ø/âßÙ®•{›]JûöPÓþøgÀÍ¥M-š{KjÏ[þË[./g%@?vh¯â·öOÿ‚½Á[uïø.ì£ÿ§ý³¬¾ü6À7ß´ï‡þ=ß|7ð7‡àðÿío¡é_³×ʼnÿþ.i·>$ÒüE¯xF1x;Þ*Óaøg⟠A¬Üêš®‘ã=MšÊïÁš7îÇü @ý³|û)þÑ?µoìûxüDý—üOû6þÍÿ>-Íð¾?‚Ÿ³Åß„ßSàç‡|Wñ3]mKþ§Á|Pм_âßióxFßTÒ~%¤;úœ~’ö?ÜxˆõêŠþ¿àÝÛkþ Ëÿˆñ7íO?Çïø)çŇ> ý›ì~ ˆt¿ƒÿ³_ì¦ø›ÅZ—Æ þ+lyuÿ~Ê~6Ò´»ÛáuÀh“Ãw“ÞO¬BÂh#´uŸû¥ ŠóÏŠþ ñGÄ/‡þ!ðƒ>-øóàW‰µˆôñ¥|Vøg¤ü.×|máIluk NwÑôŸ~,ü5½X´²ŸÃú´~#ð¸Ë¢êÚŒš$º'ˆSIñ•þ}ð]¯ø*ü³þ ûi蟳—€à§^!øµà_üð—ÇO k/ý“?b­'žÒ¼Mã_ˆþ—¾#¸Ñ~G£ëú…–«ðÏRÕ#×´Í+Ãö×:~³cfÚ,6ÜÝ€¢Õøÿðÿ‚ëÿÒÅ_ùÈߨóÿ›jþ{?à´?´ïü§ÿwo†þ5×?à¥úíû?üQÕfð†…ñ@ýŽÿdÞè¿m´ëÝlø#Æ>½ø=âÏì{Í_AÓu=gÂú†›âvÏZ³Ñ<@·ÙWZZÛÝ ó'í+ðOãOÆM#C‡à§íñ“öD×´(uõ›Rø[ðóögø‘¤x¾}^-,irx×Dý¡~üY½x|-6q&‘€üAðùï"×u¨¼Cq­:ø~oÿ²ÿü×þ ÁûEÁbu¿ø$­ïüj^h?j¿·maßÙÅvÒÜþÌøÅ­Ïâ;†’ü8ð¼­%øJöqi’xò)4õå»{Ýa´Ãk~þTWóõsÿòÿ‚ð,µ§üK÷! ‚Ÿø$¯ìk’vYn"ñmä¡îék;ѵ~<þÆß¶÷üWà¯üðþ …ÿýª´‹? üg¦üañqÿ„kà‡ìéá/ üdøm§þΟ¼mðßÇ>ñƒ~ x?Ǻ´~7øjš\:Æ›ucâO ëžÔ$Õôˆ§“Tþâè¯Åïø+Wn†ÿ³gíiû^~ÉðR?‹¿uOÿ1Âiÿ ‹þçýK? ?áÿ„sþgýGµÿ·æý—ÿïòŠþpþ(~Àÿðqƒà/k¿à¾~ø‘ñMÑïï<+àoÁ3?dŸ…þñ>¯mi4ÖZ5÷Žì#ø‘?‡Q¹Hl×RoêÖö¦>â!lOÍðlGü3þ 5ûkøëþ QðËþ -ñVûÇÞ=ý”¼[û?øKðƧðÇàÏÃKá¿‹µ}cö–ðÿÅ}P_„ðT:½ñÖ~h:}ØÖ¥Ö£Ó§Ð¤<–‹{¨5èõ¡EPEPEP_‹ÿ·GüwÄ>ñ^«ðöu}_ˆI§Ý¦±ã-Wuݾ$/­äÚ}¬>nÛ}>öêËJ}FKk¹µ bêM;H‚Ò+CÄÚOß?¶ÆåøðSÄ> ³¹þ%Ö£—@ðÖÙLSÅ}yž}ô,™–9--ÁK[”I×SºÓ¤ ëù,Ó4ëé|aãÿk+pÚŽ¿ªiV¶“K1t:&Ÿ¡ØÜ,Ö,V×6§P×5MrçP†XRî8–cl·|×â?ü@ñ÷‚ ‡âV­¯x‡âWÅÿÛk^ñ½{?ÛUo XZë7÷RüðÙÝÅ&¬–’cÌ– BMIV{{׸?liz§¯Ü\G£ØË"ü.ñ׆ü¯hQêZ—Ù¼5¨I ü¼ýšþ0ü)ðÖ±¯‡5¨< *xÎÒÏQ‚hŸZñ¹ÖUo,å¾·´þÂ?`?ø-ü³þ C£è«û=|ðÕ§ÄýJÎÖ]Göø›sið÷㎉¨¾›¥ê:ž—‚u»´ÿ„Ö-MZßMÔ|OðÏPñ¿‚_SŽæÒÃÄׯo.ߨ>$øö-ÿ‚…ü1ø¥ðSâŸÀŸÚ¿áŸ…|}®|,ø³àû}wÂß-~ü[ðD–âxŠãÚæ©ðÛâׂg¼±“PÓ>ÛáïøR{»Y$]:[ˆ™ÿ/Û§þ °øcâg×|iÿöý¢u_†Z“Ŭj_?hn¼eà›‹Ö†Ñô}ß4bñŸ„ô¨&†ú%ñg†~'êW?n³yu‹eÓ§m@úý£?à˜~"ø¡ÿšÿ‚xÿÁQ<¨|<Ñ´ÏÙ»áíð³ö‡°×uŸYxëÆzŠþ|LðŸÁà MðÞ¯á^ïÃÞ*ø¿ãi|câwÁº„^“KŽÒëÅMÓt3éÏø+ü¢Ëþ Yÿfûdë:üF¯ãëþcÿaý¿cø(§üsþ s­x§Æóßø¶÷á§‚õÿˆþ)µñÏÄ?„¿uþOèãâeα¨^ø÷ágÄ}&ëMµðl7ú®½w£ŸxB/ Kcá³&mý‚ÿÁX¿å_ðRÏû0Û#ÿY×â5 _ðcüåþì›ÿ~â¿¿ÊþÿàÆ?ùÊ/ýÙ7þýÅ”Wù‚Áêßò”߀ö` ?õ¢¿jªÿOºÿ0Oø=[þR›ðþÌágþ´WíU@é±âïxSÀ>Ö¼eãèžðŸ‡4ë½__ñ'ˆõ;=DÑô»$º¼¿Ôu+ù ´´µ¶·ŠI¥–iUU‰(|øEáß‹Ÿ /|]ð3á%§Á¯‹?|kã fßÇö^?Ò¾'ÝÿÂ]mwâ½CÂÚ—Ãý7P¶Oì9¼¥i÷ZÞ—¨Gâ¿ø5gþyâk:6‹û"ë~Õµ-:òÏOñw‡?hïÚvÿ\ðõÝżÛêÚe—Œ~1ø§ÃW–:]ÛÁ­øUÓ¥–ŽòÊæÝ¤…ùø6ö:øÉûþÈŸ¶_ìãñ§Á¾:ðÎ­àŸø)í¥ø;Ä>6ðˆ¾[ü\ø{ῆ?< áÓ¼CþÛøsãëŸj·Þñ.…¨ëÞÔ’Þî 3_Õ>Å<ôý&×ù‚Á4ÿårOˆßöÿðV/ýA¿lŠÿOºÿ(_Ùßö\øûiÁÕÿ´ìÑûKøþOÁ/‰_·ÿüïþ_ÂMãmÂoûTxÿßñQøÄ>ñnýâß è:·üJuë¶}ƒìÿjÓ.¯,î?Ñ£þ ¡û{x?þ Åûü}ý¦µ}{ÁVž=ð€5Ë‚>ñªêZ†ãߚ͜ºGÂï ÝxsÃúŽ•â}{C¼ñÞ•7‹bе&{ [kš¥Î½áë­nÃöbøkð“öæøkÿÃÿ‚œütøUái¿k þÊ~ø³ðóÆ>Õ¼á½áƵû[~Ïþ—ã&‡ øY$Õ4 éÿ?á=¾ð¥œò^hº¥¶½u¬ÝþÿÁSÿàÖ?ø'd_°ßíãOØ7ösñ¯ÃÚ{ᇀ5¿‰¿ l|ñ ã÷ƽKâ]ïm%ñ§ðªÛáç¾#øæ]wZø…¢Øjðœ^µ‡Äpø¾û@žÂÛ[T¸ðî±ýÿÁ2¼'â¯Á6ÿàŸ>ñ׆|Aà¿x/ö ý”<'ãx³FÔ|9⯠ø«ÃŸ¼£ø‡Ã^%ðö±mg«è^ е{;½/YѵKK]GKÔmnlo­ ¹‚X”Ÿÿ‚±Ê,¿à¥Ÿö`¶Gþ³¯Äjþ@¿àÆ?ùÊ/ýÙ7þýÅ_¿ðV/ùE—ü³þÌöÈÿÖuø_Àü•ûÿÃqÿÃyÿÆj~ßÿ±ÿü*ÿøeÏù1Ú;þÿþü&¿ðÑ_òTâñoü%¿ð‰Â%ÿOüƒÿ°á&ñwü}ÿm£y_ðPoø)Gƒÿ`߈_°G»¿éŸ~ ~Ý¿¶oÂoÙgÃÞ“â¯õŸø;Ǻݟ‡4Ð&kHõ©~~É_³×ÁOÿ´wíð»áÍ·„¾2þÖ׿ 5ÚÅÖž ñmì_¯þøXð¿ÃûÇðÆ­¯ßøCÂ÷:>¯ë0ÞÍà½Ã’x’óPŸWñCkZ¹[åþ&ÿà£ßðG/þßðRø"×í[eûaþյπ,]|aø±àŸj<#ã¯Aáߦƒ¢Y_xSÄZ„¼pÚ­”ö:UÞ“«hZ|ÖË­A­Ýûð Š( Š( Š( ç+þ •ñ£Uð×ü"¾Ñõ,æ±Òm Œ°"<–zþ»¨=Í÷˜].¼3§…ä_2«h'¶d•CÍß =ljíxÿHø©ðóÁ¿t, 3ÆÓµÈ`ó’áì&»Mö•q4@FךEø¹Ó/B€òÒtÀ+ü~êÚ>¥¡^ VÛì—‹JöæX%tIA1ùžD²¬lÊ7ˆÜ¬›nÇFoÜ¿ø%ÅèõxËྩ¨Ô¼#©¿‹|/g3Ÿ7þuá‡Y·´@¡~Ë¥ø‹Ó’Í'Ú|OÏÉ´(òåÿÿ‚®x'ö7ÿ‚êÿÁZc_ÚÅö^ø#ûWÁDj½Wá÷‹µýBÏNð¿€ÿh-öƒø¢X[kWsÚÆº~™ñoÃÒXxbã\¿ÕRÃKñ…<fö‘Zkz®­§ÿ‘È’Æ’Äé$R"É‘°xäÀdtu%]Hee%XA ׿—Š?àŽ_ðLÏøGö„ðOŒÿd/†¾/ÑÿjO^;ý¢þ3Þx²ãÅ~(ñN±ñ»âD…¿Š¾"xKÆÞ ñ¥ã…ZÔ©«êçDƒáF»à­'ÂRjú¼¾Ó´I5]A®~Wð×üà'€õØ|6ý¹à­? ~xgLƒDÒeo‡ÿðP¯‹žýŸmtKM.]*ËF´Ò­boˆú^•c²kM?Eø—¦ZÁý•§Ú¬_Ù¢îÆìñûöÇý™´ŸÛÛþÉý“¯>Ãiâo~¿¾|_ý¬¼YáË úoÃþ+|aø›ðÓž)Õ´ó=´~5×µ-SàÅ…¿…/'·Õï<:5{†³m/úĶßÒ·üšX¡ÿ‚XÁJÞi#‰ì ûaÄGTS,ÿ³×Ä8 Œ3 “M$pÄ€î’WHÐeÝ¿fOÙ#örýüsðÛöløSá¿…þÕ5y|Iây´±}©ø£Çž-¹¶·´¿ñ·Äk÷š·Œþ#xãT‚ÖÝu_øß^×¼I©yHo5)¶Œywímÿìý•ÿnx¦ÓÿiÝã<1wá›Oj4Ú³ö®øMð›]Ñlu}C\€ø—áÁßžø_âmeµFF¹ñ?ˆ<#©xšúÎÏDÒïu{3úžšübÿÁþ&ðþŸ®ÿÁK|1}­i¶~!ñì}©èz5ÍÜ0ê:µ†qûOÚëWZ}´Ž²]Å¥Üx‡CŠðÂÂú¥žàÊkûÝø“ñGá¿ÁßxƒâÅxKáǼ+¢ê¾"ñ‹×õïý¯ZñŸ‹õ-_Ä:¥¤úÜ>·¿ÔnDðÍ® Ùtý2ÖÞ/àÇþ íã_øCþøâx—DÐ<7sÿÿ‚žikÚ®¥kg¤K©xÏEý¬ü-á;4Ô&‘mZoø]Ñ´m$y¸¼Ô5;+xK<éŸôB×?àŸ²Þ³ð[à·ìïi¥üoð7ÁŸÙûÁ¯ðûᇾþןµïÀû+/;FÒaѼ_¨üøéà]sâšiúv…ek£Þ|SÕø›Eñ¾ |T×®m,•m®g•|£x·NÖ¬­müY£\Íö¯üBãÿ(ÿ£ÿÍ™ý°ÿú «¯ø{ÿ×ÁþøûÀÿüûÿaxçáÇ‹ü5ãßkŸðÑµŽ©ýâÏëV^!ðî«ý™¬üvÔt}Gû;XÓ¬ï>êé÷Úmß“ö{ë;›Y%À>Ìÿ‚³K?ðKø)[Í$q!ý?l8ƒHêŠeŸözø‡†b’i¤ŽÒJéì ÿÿðc-Õ²\ÿÁOìžâ¼¸ƒö.º‚Ñ¥nf¶´“ö®Šîâ( d‚Ö[Û(î%Dhá’îÕ$ekˆƒÿ`Ÿ´÷üöDý²¯|c7í!¤þÐô?A¦Zø«áÌ?¶¿í±àŸ‚Ú…®“¦é]­µ¿ÀŸ‡¿´/…> èðM…¦ßjpèžÓ£Öõøî|O¬­ÿˆõGU»ù3áGü·ÿ‹ø â–ñÏÀßÙÏâgÁ¾›>ŒÞ0øQûjþÝŸ¼RÚEÕÍíÖ”Þ ð‡í/£êÇM¹¼Ó´û¹ìMÙµšæÆÎy"im`dý²¢¹ÿ økNð_…|5àíçÄšG„ü?£xgK¼ñg‹åÒl.uIö;#ZÆya%Ó£À Ç3‡Ya¾¾ž]š,Óˆí´Í+L·y|¸Qb‰Uy  ËÍ4„"( ,Ó:€ØgÀ>øçÂÐnm5;›«mFçP’æá¾Ã$ÈñùQÅnËl²³EFX¬â6Y¦›ËR‡qæ¾$üGoÉ›¥‰íô;YL‡Ìù%Ô§RV9åŒR×-oÛŸÍ™VQBçšî¯s¯êú†±vOŸró•-¼C º¶ÕÝ´ ¼d¨>\K‘šØð7Ò]ë7×S¾Ê’À„>Þs#G>¯k£”]#AI#Vñ¡q‹iº°JòûOƒWº¶’{;´Gcê÷Á¿ø)—Å-kIÖ ñ炼¨M¤Û[ˆ\’‰ h.¼C§ê3DaÎT̉$‹3*Ú´×íkypÏ$Ÿdhç_i ’ ¶ÚN›kÆ8‘ $’\äŠýç¶´µ³ŒCgmoié´1ÁÉ$á"TQÉ'Ô“Ôš±@…þÕ_´îŽæÕÞÝàð³hÔO?Š<â-C*É;¥ö…lK3J-Þ5Lƒˆ„¯Dðßü?ÄvñIá¯x€†A4ÞÖ/´¸[1ldHî.üFÐH×'z‰%”¤_èì'úE}ñªüø+¬ÄÑ^|2ðŒ*ÝN•¤Ã Ëü?vmû:eû£•÷›>5âŸØKövñD"ðÖ££¨;”éú¡¿*ù$ºÿÂMoâŒÀ•-GÚNÖV;¨…Ò?oŸ ÌOö÷ÃÍwMþÈÖ´ýlã-’E姇ð@ “’Ì26ýŸíÑð‚á‚\èþ=°àfIôhA,´ñó)Þsà0m¡¼{]ÿ‚oxe®RëÂ?5Ý-Ñ’ÚÂîÒêXHþ”éZÞ•§RÙÐåÜêŽ׌k°Oǽ+é´xwÄûŸuœóÛ›„ Í•Xï4 Ç…Ý÷'×eÛ YYËîûö×ö¿øp¹—ÆvGŸ–ëÂþ(véÿZEÚüÝFôù¶œ ìtßÚ+à†ªµø•á˜b£ûJê]ä “%ux,X.ÒrÌaŒ€¨ü]ñ7ìåûKxePjµ›Ã30¼¾ð¼÷—«l)Y ƒN‡ÆztJÂk«x `&F#Ä5¼g¡ý®ÓR‡Ä]ÞŸ6ÉÛ]𵦱$ª¬û—ìž½³½·p6ù¯q§Â± ù’72@?£ëŠ_ µGX´ßˆž¿•š4X¬üY ÜL^bDIåE~Ò $ „B¡Ø‚$]µõ•ꇳ¼µ»F Uí®"X+lb'pB·ÊÄå<ñ_ËÕÏ5k‘„ð¦«pÑÅ$vÛW>ÕnD›Wjé:壽¬›‰q ×!@cUiT­C'ÅÛ-§XðÇŠ4ÄóLÜIe –±É¸¶vž™v‘!1Ì„–PÊHõ+E3:?Ç} . þÏñ®¯¤ÜF˜‚VŸXÓ|…hÌm]&È ÄdÄÃÎD(J)e$Wwiñ«T¿*Ïâ¶©q/?ºÆ·ÆR2V3¨‡eeR¹8Îhú+¢¿Ÿø‹ãW SÇ^)pƒ.WÄú³d"øírqÇ=+÷X¾¿ q¨ê—w«$†á¦½½šä<³Lí$ò¸i%g$ÊX³—?1-ÈôEq¨XZ+½Õí¥²G·Ìk‹˜aX÷«½¤u ¸²…ÜFK9"–ÒúÊý ¶7–·±«mi-."¸En»KÂΠÍ4ºÏŽ|) «ÿhëVk2`KyÝæXàfÚÛÍ•SºEEd°¯>ãçŠmõ­2/„Wšÿ‡|HÚ¯Ù5ûk£atŸfº‚çgÙí¤ž;6à@ÃSƒRYͦ‰¡½²–eDþ©h®#ៈïsý£©\ùlä“@?±üßQµkOütmOÖ·Oö¹t딎ß∡™ ›iáw»Oi>`ðO%™¯­oLÙ—–ßп|࿆~²ð—€|5¤øWúz¨·Ó4‹U·äÇÞ^Mó\ê:ÂĆóSÔ&ºÔ/d_:îæyIsØQ@ÿÿjOÛ[Jý¯ÛàÇ‚.f¿…‹âïÀß ü;øwû@ü#ø§qâoÚçàçˆÂÚïÇ¿~ϵ6ñïáçÁm?â÷à IñfK?Ù~o…þ)ivÄnð7Äo7ÈõßÛ?öþð–—ñÓâú¿ì£ãŸ~Ï_·‰~øÇÁšìññƒÂž6×fÏ‡ß t|CáÏÝþÖž0Ò—ã™à»ëÕÑL¾ ¼ð浨è$~ŠmvÒÃNû ]ÿ‚vh¾$ø±ñÇZÇíSûYÞ|,ø§ñ«À³‹§Nxv©º˜ð÷àðuhººQÅÔͱ¼/ŠtêQU–‹­Ìêaq8ÌGŸ|Mý°iÝWö¤øûð»àv½û=è_þ~Å~*øåáxûáÄ?‰Þ$ñ—ÅŸë×ú%ãÿjxWöƒøU ÿ®ÑïVßC½Ñítqâ}OÄ/Š-“ÅZ-¼63H¿²ŸíƒûT\øËö9ð¯íc/Àhß·§ìõsñ‹àç¾|5ø‰ðno†ß<1ð÷Âß¼[ðoâ¾ üiý dñ†›©xÅWׄ~.h>4ð| ¨ø+^ðÞ¿ðæÊmoÃz­Çi¤Á-< á^úóáçí;ûU|?Ðfý•ïd]#ÁºU÷ìéâÃõ;«cRñ-޽ñ?öpñ÷Ä]sâ6£âÍCXñ•ïˆ|qãŸé×:öµº h §èv>—û2ÿÁ?üû9ëþñn¹ñ³ã×í'⿃¢ø ð+]øýwðn/øR_ä‡ÃPkžððàÇÀÏÏ}âÈ|à¨4Ò~:|[ÿ„_ƾ ÓüUñ¢/i¼%ã_ I=®Ÿ¬ü!øgðÊïÂ,ÓBÐ>1x»]_iú«ãßì7«ükøù¡~Ñ>ý²¿jïÙÛÆ~øAâ_‚ZN•ðBÃöIºðݯƒ|kâOx«ÆSÇÿ ËöRø×âxüAâ=cÁþ–}^ÄÚ2xbÅ|-‚÷þ “Yñ{Ïø$ÇÃËχßþŸÚ—ö·‡Ãÿþ~ÊÒé5OÙÆoxOýŽuÍ/Ä¿ |_àý~÷ök»Ô¯|s«köš¾­ã­SâÞ<ҵ˿kÇNÐô8íü5‡¢ σ§QÔ¥ŒÃTÅNµX:nXš‹3\Ë QÊ^Ò”èQáÇ–e±ÂW¢éâkÊT±Jž WûCjŠÚ§MÒ– S/J=¬a‡Âà ÊpÚ\´”+Ƶlñæy•LNª«G I¼4ªâñXzXN§\ÿ‚ø+Â^ñŠ~ÍÖµ?ß³¿íðjŒÐjRãüKñ‡ìQàψ¾ ›Ãßo¾øºÖÿàŸˆ?l;ŽWÂßMñö¯ªþÍ_ ~;xz÷á·‰|;â k:ûÞϧÙún¯ÿØðþ¹àM{@Ô¿jŸÚ¢çâg¾>ü"ý¢>#þÑ“7ìÉqñâŠ~O¡]|"ðž­£Ý~ÌÓü ðÏïÝxS—vøkð[À²_^è“_k:ž¥{â¿Üx²ßíKÿÛðGíKâωÞ(Ô?hOÚCàí¿Æïƒþ ø+ñ—Ãî¾â?…¾ë^=ñËíJûâÏÀ?‹/ðÖ¯àß|Fñ­iÃßx7Ã~!•à³ñ¿‡|U¥µõ…öÔå,<ëAV¨³:3ÄR£ÍË©pþXçET«8V¤ñ9üóuZ´UjôéÑÁ,=:xiUœæ¬g(b~ÎÙ•a†–!&ÖcS<®£Z¯²R‹ú¾J°òÃÑŒU ·«M_­Î2¡ÏkÿðS‹-'Åßü¡þÃß·—Ä ÙÇÆ?|ñ]ð/ÃoƒŸ…¤Ö~|,Ò~4éá½{Søÿ£Zë6¾k6ZÇ×f¹:ÕΙà/Úøâ'ˆü)á={οi¿ø)'Žô/ÙãÇÆo€¿³?Ç‹w¾ýžµ/øÊúûöXÔ¼3w{ûPøwú¾‹®ø|ŸÚ?PÒuíSàůŒ|<þ6ðþ¾4í7Vñ±áfðœÞ8øu¬ÝxÒ×놳wŽÿfë_ÚsÆß>)xïö•ø™ñçÆ|T‡Áß´¯Š>ü1ø}iñOðo‡ü†Ÿã_ÙûöVµñ'„|7«xOÂ>Ñõ[½[Àÿ®íÃ:†›¤SRñÞµæÓÿÁ:<ªþð~Å>9ñÏÁ 꺎âoëŸ/<+{©iZÌ~;¶ø“7„ü 7Çÿ|k°Òþèݦ›á/hžƒ}{áO†>ð¿‚ô[Í/MÒ …9êA¼%J.u*âOÔl;tsyÓÄKˆj©Tö.xZ؉*xz˜^ZÙ~öyãs<>.–ª_¾H¨S„ó æT©Wƒ©åêXYå\’¥*²†# 2¥:xØU§™O5ÄÕö˜jyB¥WšøGûuhñ>üCð·íâ©aøƒáïÙëÅ?µoÆß ~Ì–>‹öŸñw„mþ"éß> Oû=øƒÂz;øòÂÃ\Ò¼ /Ä?„ÿ%ý™&ñïöû?Œš¿Ä ‰í/=öeý´ü-ûgj𗇡ý“¿iÿ‡ž°´øm{ãOŸ >Y|6üø±wðsâÂèu¿ üOø‰÷ŒôŸÚO©i—v¶3øÆ>·Õux×Äw^ñ^¡Mcÿüð%¯Æ{‹w¿ÿh]kD·ø›£|{Õ>j>&øyÁ~ÑÚ/€m~ÃñßXÓôo…úOÄ]iV6šõçÃüEÐÿg—ñõ¼_ ø/oã!ý´}—öYýœ!ý–¾j ->/üWøÑg©|Eøñ/þ_Œ0|$‡ÄÖ:¿ÅOêž?ñv—gÿ oá?Áï kþ#ñ”WÞ½Õlf×nôØõc¡YèºV•ÙNQšö˜¥Oë•*W§†Œ©`ªW©€ÀÒœ0z:Ô¨ÑÏkg˜¼ *´¡í8k ñuégO>¡Šçœ%:xYTT•l)TÅ¸ÖÆÇ KšÔ©*Î2ö5j×ÊipöWžr†‰â Øu2••Ô£­âoÙƒà/‹þÕøk Bv²ìeºÐ#þömt[‹)f¶•:|¹™üOÿÔø#©-̾ÕüUà»É÷yb¹‚M:%99,ôÄÐnï#L“¼Ôæ‘ 6%Ú̧ôNŠÀØü-ñïüëã%¥ãOà?ˆñŽž8 ãM>]'^dÚ–×k¢kñ܃óyu;Wˆ£™2çäo~Å´Ç‚#’Mgà&³ªX¬æ½øÿ …ÜêŽqsž…«ø¢úÚÞ`¹gÔ4 FFô·“‘ýEÑ@Lj|+}á{±§x«Ã4ð]þý—Ä:TÑ]ɱ™\ -FËóD¶«>é°Á@Hž6ú[ü°êWýw_iÞDA{+ÍJ}Äcn-Š“Ì€n?Ù啞£m-ž¡ik}i:”šÖòÞ+«i‚ ËèñH¤ ºA Šð­söTý›}7Â>‡w#\Êo4Km:ì\1‘œ\ „é)$‹*«€å6ßKÑÈg¼ñ5”h#.#²Óµ‹»¦qŒD±ÜÙiÖ¹n~v½T8$høàmsâ§Ä-á¯Ã=P¹¼ñݬ~"ñâ·Ÿ¥x^ mröi,ÅáÝ)!óRiþ×quw4¶v6÷_l¸µ·èCþ?öJÿ¢1¢àëÅßüÐ×Ðþ øwà?†ÚYÑ|àï ø;Kvç´ðæc¥%ÜÑÆ"[›÷´†)oîü°¯/džêAþ²f$šßÑt‹èÚNƒ¥Ãö}3DÓ,4: ³ym¬VVîbY¼»xcMÌI;rI5§EQEQEQEøAûn|!ø•ñgþ CðïÀ~ñÆ[x—ö6ñ¿Œ¾*[øoþ oûkþÄžøooጼ!ƇÞýŸìükð÷Çü'áÏkf×Áþ4ð÷üâéÞãÇ+¸]6ÌCíjÚ3Aø¿ðÇáŸÁŠŸ³µÏ€j¿Ù×Â^(ý†|añ7à¯ÄŸk<ø© ø¯áÝÇÄë/ˆšþûL|/içà?‹ãƒ£xoÁ~ ñ+ø~ÓÇÞ0¼Ôõ;„:ׇüeö_ÆØGöý¡ü`~!üýe?Ž^?m.ÇDoüaýž>|Lñƒhºa´Ý ø›Æ¾Öõ£¥éísrll ïÙ- Äæ£2É»èM7Àþ Ѭ<¥i𾕥ü;³·Óþéºo‡ô›XZhrøbÒÇÁÖv¶‘[øfÎ×ÃSÏáë{m;(aÐæ—IÂG·/l6BµêO˜æXª‘‹ýÖ>–c˜q>*Ø®k/kÃç9m<•ܵò´ñ“Ç`#€Ë°+ÍSŠÄA¥ øl(9¤êeøŒ†¨AP„ÕjUp¸Ü^I‹«šPŸ³†#˜VÃáiàqØŒfk_ñÛã/íõûMøö×ð·Ãßé>øû,EûRü ý“¾,ë·í|}àŠ?ü)áMFïÁÚWÇ_öë“Æ>6ñÎ…Ž<'ñ0ZxcöÔ¾Â«Üøûã§Ž<=¯ê¶^gû øïÆ¿¾<øƒörø+¤üø}ðƒãWíËÿ5ÑŽ—7ÂÝX·„|ðÿà ø…àKï Gá?ˆžðÄ~žêoj1ð/ü#cZñU¥µÝöãO Ée¨Ý\~²xçö*ýþ'üC¾ø»ñ+öJý™¾!üXÕ!ðý¾§ñ?Ç?¾x³â£oá;íTð¬Þ5×ü+¨x’î j^Ð5Åq©Èš5ö‡£ÝéËmq¦YI­û~Áúÿ†uï뿱7ì­x;Å_æø»â êß³wÁ½KÃ>$ø¯qgy§\|N×´ÏM¥ëçÓõBÆoj·$–ÎúòÕõ&‚êt~|5)S‡=Ysâç•gEJД¢ã†Íñ¼ŒšŒà©Î¬0Õøkˆ1X(ÕÿlÂÿlåØOíõò˜ç%{j”£Jnކ3$ÇS£*qªÝ|£Æ8IJjrÖ®.Ž}’᱕¹Ý<_ö^7S æSË¡ø›â¿ø+wí¡ið7áŸÆ¿‡ºGÁ/‰³ø/àï‚~+þ×>ðïìíy§øš?о:|Dømá­kMøÍã¿ø(—€¼Aà;_‹úÿYü=ð§ÃïÙÿöÍñ7„¯ñDðç‹|+k7¡è_n;/‰Ÿu|Yø1ñGàå—ü'áìú|+okûM|!ø“ð‹Ãþ!¾ý¬|3yáx?öžñ6‰ñÁ©aâ{ þÏÞ#ðïÃÿ‡^"×µïüMñ¡âê-ðCUýY¼ÿ‚pÿÁ<µ}ÏPýƒcûOéÞ(Ñü?kyû.üº·Ð´ŸÜj×~4Òôh'ð3ǦiÞ/»×õÛŸYY,ÚýƵ«MªÅw&£xÓXñOü³þ ùãŸÅñÆ¿°·ìsãˆ'‡cƒÇ^)ý˜þ xƒÆ0Çá 3MÑ|&‘xŸVðEÞ·x_GÑ´'ê—ÁtM3JÓl4Ñmkck]9:¼&*ºUÕ9W¥u VöÜOÃö&1¯J£FÁpþ+'ËÕ,4'–lTÌp•!_F^|m˜œ6+ F³Ãª”)à %iìjÑáŒÿ"…JœÏš´ªãóÊYÎ.RŸ..9U ³J¼1˜œd?3¼-ûzÁD4‡^ø£ñÃÿ±ÿÄã?þßZ'…¾ü8ð·ÄŸ„š•ßÅØâÿâ4ñV©ñoâ‡Ç_øCBð?ÄøþêxƒÀz§„íçø~÷öšŠühñ-¤wPAêþ+~Ù_´‡ìëûnë_´f³ƒð³Høs©èßüaðëöSý²àš_uÍZÛá,>2ñ—‹Æ•ñö—ñ_Ç/ÚxOÅÚˆðO‡µ¯éþ‹\½ðÞ¥â?xÇ\Ò5Óôÿ®¼kÿÔýˆ|Oðꇾýšþü›ÃÚ_m>|@øðWà×€¾"| Ö>%ÚGkã?|Ö“áö£¦øÄþ"ò-_ÄsC¢^i1ŠÕ4Ïèþ%Ðæ»Òî|ƒö[ÿ‚C~É_²õ·ÅÍ>ß@Ñ~.h_|ið÷⃼oû=~Ä¿ þø“Áv÷òjsh^-øQû$~Ê¿³?Âÿ‰âæõ•­µoŒ>ø‰¯ønѵ=+Áš·†´xÂÃÄ$ð׋ÝOÂVWE—‡|I«}‹RšÛîŒðP¿Û¹¿hÚ[ÂßþøMø û:xÛUøâ_‰_|7ðŸTÒtˆrüÑþ%øoâ6·¬Kÿ ø7ñPÒµøßÁ&…ðÃ?²6¡©üDðä§]ð¿íc¨x…4_ þƒi¿ðLø&ΣøÃÚ?üÛöÒ´ÛiÖ^.ÐôßÙ;à-Žâ›=T¶×4‹Oé–¾ŠË\¶Òõ«+=cNƒS‚ê+RÒÛP¶H®àŠeôÏþÆ?±ïį'Ž>#~ʳWükÃ럄±ø¿Æ¿¾ø«Åü*¼ÒõMóášxƒ]ðµþ¬Ÿ®ôMsZÑî|·cÓézÆ©§Ë¦µ¦¡w ݹ¬§ŽÄãqXY¼kÑÌMFXJ¬V/ÚáñRåTå%K,¦²J”(,4)¼RÎ2ÙeÿØÙ>QK‡,£õ*8z8«c§Me¯œ§Uz84)T¦¹åZ1u±ŽY’«^X©TXìüÂ9ö®i˜Tü§ðgísÿ?ñ·†¼+©¿‰?`¿ ÜüEý|;ûti—«ðö„ñ¤L:~›&¹ðvûD?´ÿdø²<]'ˆ4ÝCIø¥oâ/ƒ_ð­ÓK¾Ñ/>|UmZßÄ:EÙŸþ iûgþÓ|wáŸÙ»ÃšìɳðWá§ÅïjçàÕŒžñ¿ÆÙóáïÅÛ_iß]ø[ö~øMáû¯†Z¿Ž’¼oªü>¸Ò|%i/ƒ5/ÅooН¼8úmψRSW–ñb@¼ _ðL_ø&ÄÛÃü×ö†+;}zÒÒ(¿d߀±Çkiâ«9´ïZÛ¢x,þ$Óî.,5èb ±g<ÖºŠÜA+£c…”°õ±5ä•g^Y$áJjÆ*|CW«ÒääÄÃ4Äg´ªÕ8á#ƒ§–ÑÁa2ú”°˜=±4§^:Q¬éN<Ú•:ñ¦”ÿáR\? u`¹ù©K/Ãduéaá:¸Ÿo[5©ŒÅÔ«‰ÂN¦að‡ÅÿÚ_þ ½e/íŸãƒ7°­ïÃ?ØçãÏŠôcFñÀßÚ?ÆŸ|yð?Mø)ðÃã¶ÿ ø¿´/…ô[ŸŒ>Ð*øƒA×>*øçöSøgðOáo>%A¢ÝÝËu£ø»Ä2ü.ñ]®¹a©k:5ÇŠì­¬>$xrÓYÔu‡þ8ðg‰^ß^¶ô‰ÿ³'ìÝñ»Â~ðƯÙÿà§Æ/|>¿Ó5oxGâ·ÂßüFð׃5mM—FÒ5o hž2Ðõ­;AÕtÍ"yôËKL·µ½µ°žkHgH%‘£pþÍU9«SËñ ë*´èÎxÿìœVI˜q6.´kGNRÎ+eJŽ]J´çG ny‡Åa*á1xfkZU&±þÍû˜ê ²Ó¬?jÁûLëž3ðÿÆ?ß´ßíùsð{â—Ãß„ÞÕ>hŸ?i?¾.øOñCÅŸ n ðÖ·ã_ü8ñÏ‚íµëýkYñzø/ž ÐtØ–¾oð_íÛÿø¹àŸ„Ú÷Ãí{ö#Ñ&ñ·ì1ñãö™ñˆ ~Ϭ<¥xoš'í?¢µ×~)hž#п³E÷W[øg¨XjšäÚŸÅKMFÏÃzí?ÆÙßöý£ü3¦x+ö‡øðwãσt]fßÄz?„¾3ü2ðWÅ é>!´³¼Ó­uÝ3AñƉ®iVͶŸ¨êú¥­¬WÐÙß^ZÇ:Áu:?‘éŸðOØE7£~IJ’Ú‡ƒüSðòý´ÏÙ«àžðŽou=GÆÞ¼6¾ ˆÜø?Æ:†µ¬_ø§Ã3oÑ|C{«jwZµ•Ü÷÷O/.*8Êóœéâ^“ ˜RÂFÏ£Wû2ÁäS«,K©R¥,³8ÇÐÌ*Ò¯7üNøñö.ø)à?ƒzG‚¼ ãŸügðÅO|XoüQý˜< ñƒGñ߆üqàïŽÿ to ½Ïþ0ø/Á >ë¾Õ>(AáíCVµøÉá _ÅšV…á¯øSûa~×_þ |3ø¯ñÓÄ~;ÿgÁ"dý¥>|EÓ¼gûDü*ÓµýZ GörÓ—íàŸü_øÓðÊê_ IãX¼Kã¿Ú‚Ükÿ|]¦_x¥´o |Ñ4­cHøûi þÅ¿±Ï…|càˆžý“f ü@øSàøþ|.ñÖƒð#ánã†Þ‹OÖ´ˆ¼ àéþ·Öüàøô¯x‹LÃ>¾Ó´TÓõíjÉl…¶©}ü÷Ã_ø'÷ìðcÄ·^3ø=ûþÈß ‡Vÿ~!øÛá7Åß…:‡ŠtjúÇÄ?‰¾ñ÷€.¼'©YYø:ïâq:çíùûqø/Ä~ð§‹&ýœïtŸ†?´/ÄO…´ÏÆ_~Îý³¾økâ$0j-þü7ÓuÜ~¨ZþÃÿ±eÃÏ |"²ýeÛ?…ñÌ<ðÆ×àÂ{‡žø—å|Cð·‚âð’xoÃþ9í_‹t6Ó_O>mº€ó_v?ðOOØÛǶŸmÿaÏØúŠ;Ÿâ•Ĉf‚ñxöËâmÖ¯kâ ŸˆÖž0O/ˆm¼wq¯XÙksø¾Ep=ðÉhQƒáÁÍîújk+ë„:ð¥w.‰ƒ´¯x N!Ø !ߟ„â~e¬Ð±7§xˆWwDƒãOñ5!Šq—\ôî+z3޲­éÏ*,Cœ˜f’Ö¿N³_™öÞǤ]4)žÃ­—*9;YƒßݺØweÆ`Ó=ÌHFaSv ,Z’®1&˜Ñõ5$«é{E7ÈÐòÀ/Vcû<%{g¼ê7 ->‘5OÂY#g«_òzäS 0@»-š1?2¶)a¸  ®Ë ƒYJKÈš¶…MÃ6yQPœüÆ(F~žÂÌØZùx”~ xhóîÀX q·ï`–§,7)YÑдéâ‘f `^õ'î½ãýCü5?ÅØp/†³o»&ùÿÄI?s‰uÞ~,ßÿ’;·û ÿÿPK!Øý¬¶ppt/tableStyles.xml ÌI‚0@Ὁwhþ}-CQ$ +wê*”!é@h£ãÝeùò’/Í?J¢—Xìd4ÿàº5ݤ{ƒc@ÖqÝqi´`° y¶ß¥ß½w猧ûF“­ô ¬©hÑÍ)‘†[¡L]ÑÏåGgH fÓÖÈŠ$ÐéäùiìÊ­’»¹'` d]‡àÊ,¾– ƒ®uÒ`ne}ú¾Î„g;ntÖËóAÖ0eè©ß?ÒoW+Åå»å›Fš@¼Ô, yX+g4÷šó¦í¾¥¤„/TWQÐb¹Þ4߆)#t‚ÂM”Ôºs}Ä ÖK1“«@àˆk|ôrš]ç–Öµ©Q0hSÙÐJÈ8%Áò…W^2É–ùgOQ´d :“1+aOâ{/”üæí î„qø©Ï•Ö«Z²¯h§(ú”Ð û‘>–ñ ƒzƒôf¢¬Ö&ØŠ Ä][¤ÄY¨h¯HòÎ%)8ž5_@"ø•ÂHéV¿±AÂRîÛSœVraóWwRYÿj¾„"xÚÓµ^|ë(öÌìW)ß]{%Žq|¿„ã²Þð ã¿Á!™i_Ût¾ÿÿPK!Û!| O6docProps/app.xml ¢( ÔTÏ‹Ó@¾ þ!'=l§­uYÊté²ô຅f×󘼴ƒéL˜ë®'7YÙƒ¢DÖ›ÿÿ _’þt‹ 7CßûÞ7/o¾¼º}4N¼ h#”ìøZÝ÷@†*rØñ‚Ý-ß3–ˈ'JBÇ?ão³Ë—h_«´`<,!MÇY›¶ 1áÆÜÔ0-1+=æC=$*ŽE;*¼7iI³^ß$pdAFm¤ó‚~U±=±ÿZ4RaÑŸ9 ŽSl˜Ñ@Yžb ¬±EÉ"¢·•Ž knRR!z#Mr‹~°=jeTl½ý²s¯¯îƒî+!-%ËBt n©\¶[î˜ýxöùçË×îäüûÓS—=q'/\væ]iµ¯]¥džö¹æCÍÓ‘a&J!$"ÃZ”L½¥,uJ*@{"Š@N³H¯Äto¯›ˆ´ÔÏ „<.ºÄbžÀÒs‚ö€ÐçBF'¶=Ð*íñg å{w¸ÂÛŽ?áZpiÑãBV%NRc5sù#—qù+J0_q%\–.cÑbR€àÂiýì“Ëß¹ìË{.ûZ¢S—ŸÿÅ÷ÐVìíÂ÷ ²Ú76²êH lf?Æd×t}Ù ²ÏÊžªåé,}{x†ïZæN-æÍsùs—¿wzùaº(ûèò·.oõ¿[ºâòo¾Þò®9HµÃ-ÌÆt•¤ƒ×á2Ë/Úà ÕIQ¤;ârÑLs1QœûÃêdV­ŽOyÄg\qdgûÿÿPK! ÂÙ¹docProps/core.xml ¢( Œ’±nÛ0†÷}»LRnb—e  2Õ€ºHÐ%Ï6‰H6ŠW è+ô²dë%o£ %Ûª‚vÈxüÿûøß‘éü¡È£{°N=CtDPZ©ôf†¾­®ã)ŠœçZòÜh˜¡84Ï>œ¥¢dÂXXZS‚õ \HÚ1QÎÐÖû’aìÄ îFÁ¡ƒ¸6¶à>”vƒK.îøpBÈ.ÀsÉ=Ç-0.{":"¥è‘åO›w)0äP€öÓŽláþÛÐ)g¡ü® 3ãÙRÄÞýàTo¬ªjT»!?Å·‹/_»Qc¥Û] @Y*óÊç-Mvi”öQSÿnê§fÿÒÔšúWS¿4ûç¦~ eŠû†¶UXàÞØLÇNý«éäÓa»ûœ;¿Ï´V ?ï¾µÖná^µOœÑ‹ëpY·–à £0(;¬å¤ÜŒ/¯V×(KÄ$‰)Y‘)#çŒÒïm®7ýíà‡ƒâ˜îÄd²"–|bÏÄ ë¿ýlÙ+ÿÿPK-!kÀ†î2[Content_Types].xmlPK-!høt¡â '_rels/.relsPK-!“¿EšÙ¾ ]ppt/slides/_rels/slide1.xml.relsPK-!c\#´Á7 tppt/slides/_rels/slide2.xml.relsPK-!c\#´Á7 s ppt/slides/_rels/slide3.xml.relsPK-!¯ÚD Ù¾ r ppt/slides/_rels/slide4.xml.relsPK-!Ýó¶+_‰ ppt/_rels/presentation.xml.relsPK-! 'ÿÔù ppt/presentation.xmlPK-!‘ŒÏ6'A?ppt/slides/slide2.xmlPK-!1—Êâ½­ 8ppt/slides/slide1.xmlPK-!o@Ç—“q=ppt/slides/slide4.xmlPK-!‹©‹[÷&‡;Cppt/slides/slide3.xmlPK-!ÕÑ’ñ¾7,eXppt/slideLayouts/_rels/slideLayout7.xml.relsPK-!ÕÑ’ñ¾7,mYppt/slideLayouts/_rels/slideLayout4.xml.relsPK-!ÕÑ’ñ¾7,uZppt/slideLayouts/_rels/slideLayout6.xml.relsPK-!ÕÑ’ñ¾7,}[ppt/slideLayouts/_rels/slideLayout9.xml.relsPK-!ÕÑ’ñ¾7,…\ppt/slideLayouts/_rels/slideLayout8.xml.relsPK-!ÕÑ’ñ¾7-]ppt/slideLayouts/_rels/slideLayout10.xml.relsPK-!i¢_!Ç,–^ppt/slideMasters/_rels/slideMaster1.xml.relsPK-!ÕÑ’ñ¾7,þ_ppt/slideLayouts/_rels/slideLayout1.xml.relsPK-!ÕÑ’ñ¾7,appt/slideLayouts/_rels/slideLayout2.xml.relsPK-!ÕÑ’ñ¾7,bppt/slideLayouts/_rels/slideLayout5.xml.relsPK-!ÕÑ’ñ¾7-cppt/slideLayouts/_rels/slideLayout11.xml.relsPK-!·cÔ·Y"dppt/slideLayouts/slideLayout11.xmlPK-!XYJg­!ippt/slideLayouts/slideLayout3.xmlPK-!Oùªƒ?4!¼nppt/slideLayouts/slideLayout2.xmlPK-!èŸhà r!:sppt/slideLayouts/slideLayout1.xmlPK-!A+,]¿z6!…xppt/slideMasters/slideMaster1.xmlPK-!p¡ù&†!ƒppt/slideLayouts/slideLayout4.xmlPK-!Õy„‡‘¹$!è†ppt/slideLayouts/slideLayout5.xmlPK-!oK„J¨Ñ !¸ppt/slideLayouts/slideLayout6.xmlPK-!Û›X¼dp"Ÿ‘ppt/slideLayouts/slideLayout10.xmlPK-!Ëg_)†¤!C–ppt/slideLayouts/slideLayout9.xmlPK-!ì#?ð¡!œppt/slideLayouts/slideLayout8.xmlPK-!ŠK¶e/!7¢ppt/slideLayouts/slideLayout7.xmlPK-!ÕÑ’ñ¾7,Û¥ppt/slideLayouts/_rels/slideLayout3.xml.relsPK-!À¥ºã¦ppt/theme/theme1.xmlPK-!ân"øEÈŽº­ppt/media/image1.emfPK- !ý1*¢¢äóppt/media/image2.pngPK- !—t‰TT¸ûdocProps/thumbnail.jpegPK-!95ì9íOppt/presProps.xmlPK-!Øý¬¶¬Qppt/tableStyles.xmlPK-!·DTœX‰Rppt/viewProps.xmlPK-!Û!| O6TTdocProps/app.xmlPK-! ÂÙ¹ÙWdocProps/core.xmlPK--„ ŸZsleef-3.3.1/include/000077500000000000000000000000001333715643700142535ustar00rootroot00000000000000sleef-3.3.1/include/sleefdft.h000066400000000000000000000062521333715643700162250ustar00rootroot00000000000000#ifndef __SLEEFDFT_H__ #define __SLEEFDFT_H__ #include #include #define SLEEF_MODE_FORWARD (0 << 0) #define SLEEF_MODE_BACKWARD (1 << 0) #define SLEEF_MODE_COMPLEX (0 << 1) #define SLEEF_MODE_REAL (1 << 1) #define SLEEF_MODE_ALT (1 << 2) #define SLEEF_MODE_FFTWCOMPAT (1 << 3) #define SLEEF_MODE_DEBUG (1 << 10) #define SLEEF_MODE_VERBOSE (1 << 11) #define SLEEF_MODE_NO_MT (1 << 12) #define SLEEF_MODE_ESTIMATE (1 << 20) #define SLEEF_MODE_MEASURE (2 << 20) #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #ifdef IMPORT_IS_EXPORT #define IMPORT __declspec(dllexport) #else // #ifdef IMPORT_IS_EXPORT #define IMPORT __declspec(dllimport) #if (defined(_MSC_VER)) #pragma comment(lib,"sleefdft.lib") #endif // #if (defined(_MSC_VER)) #endif // #ifdef IMPORT_IS_EXPORT #else // #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #define IMPORT #endif // #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) IMPORT struct SleefDFT *SleefDFT_double_init1d(uint32_t n, const double *in, double *out, uint64_t mode); IMPORT struct SleefDFT *SleefDFT_double_init2d(uint32_t n, uint32_t m, const double *in, double *out, uint64_t mode); IMPORT void SleefDFT_double_execute(struct SleefDFT *ptr, const double *in, double *out); IMPORT struct SleefDFT *SleefDFT_float_init1d(uint32_t n, const float *in, float *out, uint64_t mode); IMPORT struct SleefDFT *SleefDFT_float_init2d(uint32_t n, uint32_t m, const float *in, float *out, uint64_t mode); IMPORT void SleefDFT_float_execute(struct SleefDFT *ptr, const float *in, float *out); IMPORT struct SleefDFT *SleefDFT_longdouble_init1d(uint32_t n, const long double *in, long double *out, uint64_t mode); IMPORT struct SleefDFT *SleefDFT_longdouble_init2d(uint32_t n, uint32_t m, const long double *in, long double *out, uint64_t mode); IMPORT void SleefDFT_longdouble_execute(struct SleefDFT *ptr, const long double *in, long double *out); #if defined(ENABLEFLOAT128) && !defined(Sleef_quad2_DEFINED) #define Sleef_quad2_DEFINED typedef __float128 Sleef_quad; typedef struct { Sleef_quad x, y; } Sleef_quad2; #endif #if defined(Sleef_quad2_DEFINED) IMPORT struct SleefDFT *SleefDFT_quad_init1d(uint32_t n, const Sleef_quad *in, Sleef_quad *out, uint64_t mode); IMPORT struct SleefDFT *SleefDFT_quad_init2d(uint32_t n, uint32_t m, const Sleef_quad *in, Sleef_quad *out, uint64_t mode); IMPORT void SleefDFT_quad_execute(struct SleefDFT *ptr, const Sleef_quad *in, Sleef_quad *out); #endif IMPORT void SleefDFT_dispose(struct SleefDFT *ptr); IMPORT void SleefDFT_setPath(struct SleefDFT *ptr, char *pathStr); // IMPORT void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode); #define SLEEF_PLAN_AUTOMATIC 0 #define SLEEF_PLAN_READONLY (1 << 0) #define SLEEF_PLAN_RESET (1 << 1) #define SLEEF_PLAN_BUILDALLPLAN (1 << 2) #define SLEEF_PLAN_NOLOCK (1 << 3) #define SLEEF_PLAN_MEASURE (1 << 29) #define SLEEF_PLAN_REFERTOENVVAR (1 << 30) #undef IMPORT #endif sleef-3.3.1/sleef-config.h.in000066400000000000000000000006701333715643700157520ustar00rootroot00000000000000// Configuration of @PROJECT_NAME@ ///////////////////////////////////////////// #ifndef CONFIG_H #define CONFIG_H #define SLEEF_VERSION_MAJOR @SLEEF_VERSION_MAJOR@ #define SLEEF_VERSION_MINOR @SLEEF_VERSION_MINOR@ // FEATURE DETECTION *********************************************************** #cmakedefine COMPILER_SUPPORTS_LONG_DOUBLE #cmakedefine COMPILER_SUPPORTS_FLOAT128 #cmakedefine COMPILER_SUPPORTS_SSE2 #endif // CONFIG_H sleef-3.3.1/src/000077500000000000000000000000001333715643700134175ustar00rootroot00000000000000sleef-3.3.1/src/CMakeLists.txt000066400000000000000000000004521333715643700161600ustar00rootroot00000000000000include_directories("common") include_directories("arch") add_subdirectory("libm") if (BUILD_TESTS) add_subdirectory("libm-tester") endif() add_subdirectory("common") if (BUILD_DFT AND NOT MINGW) add_subdirectory("dft") if (BUILD_TESTS) add_subdirectory("dft-tester") endif() endif() sleef-3.3.1/src/arch/000077500000000000000000000000001333715643700143345ustar00rootroot00000000000000sleef-3.3.1/src/arch/helperadvsimd.h000066400000000000000000000554701333715643700173470ustar00rootroot00000000000000/*********************************************************************/ /* Copyright ARM Ltd. 2010 - 2017. */ /* Distributed under the Boost Software License, Version 1.0. */ /* (See accompanying file LICENSE.txt or copy at */ /* http://www.boost.org/LICENSE_1_0.txt) */ /*********************************************************************/ #ifndef __ARM_NEON #error Please specify advsimd flags. #endif #include #include #include "misc.h" #define ENABLE_DP #define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP #define ENABLE_SP #define LOG2VECTLENSP 2 #define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP #define FULL_FP_ROUNDING #define ACCURATE_SQRT #define ISANAME "AArch64 AdvSIMD" // Mask definition typedef uint32x4_t vmask; typedef uint32x4_t vopmask; // Single precision definitions typedef float32x4_t vfloat; typedef int32x4_t vint2; // Double precision definitions typedef float64x2_t vdouble; typedef int32x2_t vint; #define DFTPRIORITY 10 static INLINE int vavailability_i(int name) { return 3; } static INLINE void vprefetch_v_p(const void *ptr) { } static INLINE int vtestallones_i_vo32(vopmask g) { uint32x2_t x0 = vand_u32(vget_low_u32(g), vget_high_u32(g)); uint32x2_t x1 = vpmin_u32(x0, x0); return vget_lane_u32(x1, 0); } static INLINE int vtestallones_i_vo64(vopmask g) { uint32x2_t x0 = vand_u32(vget_low_u32(g), vget_high_u32(g)); uint32x2_t x1 = vpmin_u32(x0, x0); return vget_lane_u32(x1, 0); } // Vector load / store static INLINE vdouble vload_vd_p(const double *ptr) { return vld1q_f64(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return vld1q_f64(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { vst1q_f64(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { vst1q_f64(ptr, v); } static INLINE vfloat vload_vf_p(const float *ptr) { return vld1q_f32(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return vld1q_f32(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); } static INLINE vint2 vloadu_vi2_p(int32_t *p) { return vld1q_s32(p); } static INLINE void vstoreu_v_p_vi2(int32_t *p, vint2 v) { vst1q_s32(p, v); } static INLINE vint vloadu_vi_p(int32_t *p) { return vld1_s32(p); } static INLINE void vstoreu_v_p_vi(int32_t *p, vint v) { vst1_s32(p, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return ((vdouble) { ptr[vget_lane_s32(vi, 0)], ptr[vget_lane_s32(vi, 1)]} ); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return ((vfloat) { ptr[vgetq_lane_s32(vi2, 0)], ptr[vgetq_lane_s32(vi2, 1)], ptr[vgetq_lane_s32(vi2, 2)], ptr[vgetq_lane_s32(vi2, 3)] }); } // Basic logical operations for mask static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vandq_u32(x, y); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vbicq_u32(y, x); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vorrq_u32(x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return veorq_u32(x, y); } // Mask <--> single precision reinterpret static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return vreinterpretq_u32_f32(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return vreinterpretq_f32_u32(vm); } static INLINE vint2 vcast_vi2_vm(vmask vm) { return vreinterpretq_s32_u32(vm); } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vreinterpretq_u32_s32(vi); } // Mask <--> double precision reinterpret static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return vreinterpretq_u32_f64(vd); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return vreinterpretq_f64_u32(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return vreinterpretq_f32_s32(vm); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return vreinterpretq_s32_f32(vf); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return vreinterpretq_s32_f64(vd); } /****************************************/ /* Single precision FP operations */ /****************************************/ // Broadcast static INLINE vfloat vcast_vf_f(float f) { return vdupq_n_f32(f); } // Add, Sub, Mul, Reciprocal 1/x, Division, Square root static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return vaddq_f32(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return vsubq_f32(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return vmulq_f32(x, y); } static INLINE vfloat vrec_vf_vf(vfloat d) { return vdivq_f32(vcast_vf_f(1.0f), d); } static INLINE vfloat vdiv_vf_vf_vf(vfloat n, vfloat d) { return vdivq_f32(n, d); } static INLINE vfloat vsqrt_vf_vf(vfloat d) { return vsqrtq_f32(d); } // Multiply accumulate: z = z + x * y static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfmaq_f32(z, x, y); } // Multiply subtract: z = z = x * y static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfmsq_f32(z, x, y); } // |x|, -x static INLINE vfloat vabs_vf_vf(vfloat f) { return vabsq_f32(f); } static INLINE vfloat vneg_vf_vf(vfloat f) { return vnegq_f32(f); } // max, min static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return vmaxq_f32(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vminq_f32(x, y); } // Comparisons static INLINE vmask veq_vm_vf_vf(vfloat x, vfloat y) { return vceqq_f32(x, y); } static INLINE vmask vneq_vm_vf_vf(vfloat x, vfloat y) { return vmvnq_u32(vceqq_f32(x, y)); } static INLINE vmask vlt_vm_vf_vf(vfloat x, vfloat y) { return vcltq_f32(x, y); } static INLINE vmask vle_vm_vf_vf(vfloat x, vfloat y) { return vcleq_f32(x, y); } static INLINE vmask vgt_vm_vf_vf(vfloat x, vfloat y) { return vcgtq_f32(x, y); } static INLINE vmask vge_vm_vf_vf(vfloat x, vfloat y) { return vcgeq_f32(x, y); } // Conditional select static INLINE vfloat vsel_vf_vm_vf_vf(vmask mask, vfloat x, vfloat y) { return vbslq_f32(mask, x, y); } // int <--> float conversions static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcvtq_s32_f32(vf); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return vcvtq_f32_s32(vi); } static INLINE vint2 vcast_vi2_i(int i) { return vdupq_n_s32(i); } static INLINE vint2 vrint_vi2_vf(vfloat d) { return vcvtq_s32_f32(vrndnq_f32(d)); } /***************************************/ /* Single precision integer operations */ /***************************************/ // Add, Sub, Neg (-x) static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vaddq_s32(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vsubq_s32(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vnegq_s32(e); } // Logical operations static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vandq_s32(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vbicq_s32(y, x); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vorrq_s32(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return veorq_s32(x, y); } // Shifts #define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) #define vsrl_vi2_vi2_i(x, c) \ vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) #define vsra_vi_vi_i(x, c) vshr_n_s32(x, c) #define vsll_vi_vi_i(x, c) vshl_n_s32(x, c) #define vsrl_vi_vi_i(x, c) \ vreinterpret_s32_u32(vshr_n_u32(vreinterpret_u32_s32(x), c)) // Comparison returning masks static INLINE vmask veq_vm_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } static INLINE vmask vgt_vm_vi2_vi2(vint2 x, vint2 y) { return vcgeq_s32(x, y); } // Comparison returning integers static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return vreinterpretq_s32_u32(vcgeq_s32(x, y)); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return vreinterpretq_s32_u32(vceqq_s32(x, y)); } // Conditional select static INLINE vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) { return vbslq_s32(m, x, y); } /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /****************************************/ /* Double precision FP operations */ /****************************************/ // Broadcast static INLINE vdouble vcast_vd_d(double f) { return vdupq_n_f64(f); } // Add, Sub, Mul, Reciprocal 1/x, Division, Square root static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return vaddq_f64(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return vsubq_f64(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return vmulq_f64(x, y); } static INLINE vdouble vrec_vd_vd(vdouble d) { return vdivq_f64(vcast_vd_d(1.0f), d); } static INLINE vdouble vdiv_vd_vd_vd(vdouble n, vdouble d) { return vdivq_f64(n, d); } static INLINE vdouble vsqrt_vd_vd(vdouble d) { return vsqrtq_f64(d); } // |x|, -x static INLINE vdouble vabs_vd_vd(vdouble f) { return vabsq_f64(f); } static INLINE vdouble vneg_vd_vd(vdouble f) { return vnegq_f64(f); } // max, min static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return vmaxq_f64(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return vminq_f64(x, y); } // Multiply accumulate: z = z + x * y static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vfmaq_f64(z, x, y); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vfmsq_f64(z, x, y); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z + x * y return vfmaq_f64(z, x, y); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z - x * y return vfmsq_f64(z, x, y); } //[z = x * y - z] static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vneg_vd_vd(vfmanp_vd_vd_vd_vd(x, y, z)); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // x * y - z return vneg_vd_vd(vfmanp_vd_vd_vd_vd(x, y, z)); } static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // z + x * y return vfmaq_f32(z, x, y); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // z - x * y return vfmsq_f32(z, x, y); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // x * y - z return vneg_vf_vf(vfmanp_vf_vf_vf_vf(x, y, z)); } /* Comparisons */ static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpretq_u32_u64(vceqq_f64(x, y)); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(x, y))); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpretq_u32_u64(vcltq_f64(x, y)); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpretq_u32_u64(vcgtq_f64(x, y)); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return vreinterpretq_u32_u64(vcleq_f64(x, y)); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return vreinterpretq_u32_u64(vcgeq_f64(x, y)); } // Conditional select static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) { return vbslq_f64(vreinterpretq_u64_u32(mask), x, y); } #if 1 static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } #else // This implementation is slower on the current CPU models (as of May 2017.) // I(Naoki Shibata) expect that on future CPU models with hardware similar to Super Shuffle Engine, this implementation will be faster. static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double d0, double d1) { uint8x16_t idx = vbslq_u8(vreinterpretq_u8_u32(o), (uint8x16_t) { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }, (uint8x16_t) { 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15 }); uint8x16_t tab = (uint8x16_t) (float64x2_t) { d0, d1 }; return (vdouble) vqtbl1q_u8(tab, idx); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { uint8x16_t idx = vbslq_u8(vreinterpretq_u8_u32(o0), (uint8x16_t) { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 }, vbslq_u8(vreinterpretq_u8_u32(o1), (uint8x16_t) { 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15 }, vbslq_u8(vreinterpretq_u8_u32(o2), (uint8x16_t) { 16, 17, 18, 19, 20, 21, 22, 23, 16, 17, 18, 19, 20, 21, 22, 23 }, (uint8x16_t) { 24, 25, 26, 27, 28, 29, 30, 31, 24, 25, 26, 27, 28, 29, 30, 31 }))); uint8x16x2_t tab = { { (uint8x16_t) (float64x2_t) { d0, d1 }, (uint8x16_t) (float64x2_t) { d2, d3 } } }; return (vdouble) vqtbl2q_u8(tab, idx); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2); } #endif static INLINE vdouble vrint_vd_vd(vdouble d) { return vrndnq_f64(d); } static INLINE vfloat vrint_vf_vf(vfloat d) { return vrndnq_f32(d); } /****************************************/ /* int <--> float conversions */ /****************************************/ static INLINE vint vtruncate_vi_vd(vdouble vf) { return vmovn_s64(vcvtq_s64_f64(vf)); } static INLINE vdouble vcast_vd_vi(vint vi) { return vcvtq_f64_s64(vmovl_s32(vi)); } static INLINE vint vcast_vi_i(int i) { return vdup_n_s32(i); } static INLINE vint vrint_vi_vd(vdouble d) { return vqmovn_s64(vcvtq_s64_f64(vrndnq_f64(d))); } /***************************************/ /* Integer operations */ /***************************************/ // Add, Sub, Neg (-x) static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return vadd_s32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return vsub_s32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vneg_s32(e); } // Logical operations static INLINE vint vand_vi_vi_vi(vint x, vint y) { return vand_s32(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return vbic_s32(y, x); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return vorr_s32(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return veor_s32(x, y); } // Comparison returning masks static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return vcombine_u32(vceq_s32(x, y), vdup_n_u32(0)); } // Conditional select static INLINE vint vsel_vi_vm_vi_vi(vmask m, vint x, vint y) { return vbsl_s32(vget_low_u32(m), x, y); } /***************************************/ /* Predicates */ /***************************************/ static INLINE vopmask visinf_vo_vd(vdouble d) { const float64x2_t inf = vdupq_n_f64(SLEEF_INFINITY); const float64x2_t neg_inf = vdupq_n_f64(-SLEEF_INFINITY); uint64x2_t cmp = vorrq_u64(vceqq_f64(d, inf), vceqq_f64(d, neg_inf)); return vreinterpretq_u32_u64(cmp); } static INLINE vopmask visnan_vo_vd(vdouble d) { return vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(d, d))); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return vreinterpretq_u32_u64(vceqq_f64(d, vdupq_n_f64(SLEEF_INFINITY))); } static INLINE vopmask visminf_vo_vd(vdouble d) { return vreinterpretq_u32_u64(vceqq_f64(d, vdupq_n_f64(-SLEEF_INFINITY))); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { return vbslq_f32(mask, x, y); } static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vceqq_f32(x, y); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vmvnq_u32(vceqq_f32(x, y)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vcltq_f32(x, y); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vcleq_f32(x, y); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vcgtq_f32(x, y); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vcgeq_f32(x, y); } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return vcgtq_s32(x, y); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return vcombine_u32(vcgt_s32(x, y), vdup_n_u32(0)); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } static INLINE vopmask vcast_vo32_vo64(vopmask m) { return vuzpq_u32(m, m).val[0]; } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return vzipq_u32(m, m).val[0]; } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vandq_u32(x, y); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vbicq_u32(y, x); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vorrq_u32(x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return veorq_u32(x, y); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return vbslq_s32(m, x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vandq_s32(vreinterpretq_s32_u32(x), y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vbicq_s32(y, vreinterpretq_s32_u32(x)); } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return vbic_s32(y, vget_low_s32(vreinterpretq_s32_u32(x))); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vandq_u32(x, y); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vandq_u32(x, y); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vbicq_u32(y, x); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vbicq_u32(y, x); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vorrq_u32(x, y); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vorrq_u32(x, y); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return veorq_u32(x, y); } static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vrndq_f32(vd); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return vreinterpretq_u32_u64(vdupq_n_u64((0xffffffff & (uint64_t)i1) | (((uint64_t)i0) << 32))); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return vreinterpretq_u32_u64(vceqq_s64(vreinterpretq_s64_u32(x), vreinterpretq_s64_u32(y))); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return vreinterpretq_u32_s64(vaddq_s64(vreinterpretq_s64_u32(x), vreinterpretq_s64_u32(y))); } static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return vbsl_s32(vget_low_u32(m), x, y); } // Logical operations static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return vand_s32(vreinterpret_s32_u32(vget_low_u32(x)), y); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return vreinterpretq_s32_u32(vrev64q_u32(vreinterpretq_u32_u64(vmovl_u32(vreinterpret_u32_s32(vi))))); } static INLINE vint vcastu_vi_vi2(vint2 vi2) { return vreinterpret_s32_u32(vmovn_u64(vreinterpretq_u64_u32(vrev64q_u32(vreinterpretq_u32_s32(vi2))))); } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return vreinterpretq_f64_s32(vi); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vrndq_f64(vd); } // #define PNMASK ((vdouble) { +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)PNMASKf); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)NPMASKf); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat d0, vfloat d1) { return vadd_vf_vf_vf(d0, vnegpos_vf_vf(d1)); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vdouble vrev21_vd_vd(vdouble d0) { return (float64x2_t)vcombine_u64(vget_high_u64((uint64x2_t)d0), vget_low_u64((uint64x2_t)d0)); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { vstore_v_p_vd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } static INLINE vfloat vrev21_vf_vf(vfloat d0) { return vrev64q_f32(d0); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { return vcombine_f32(vget_high_f32(d0), vget_low_f32(d0)); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); } sleef-3.3.1/src/arch/helperavx.h000066400000000000000000000623341333715643700165130ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 1 #if !defined(__AVX__) #error Please specify -mavx. #endif #elif CONFIG == 4 #if !defined(__AVX__) || !defined(__FMA4__) #error Please specify -mavx and -mfma4. #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP #define LOG2VECTLENDP 2 #define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) #define FULL_FP_ROUNDING #define ACCURATE_SQRT #if defined(_MSC_VER) #include #else #include #endif #include #include "misc.h" typedef __m256i vmask; typedef __m256i vopmask; typedef __m256d vdouble; typedef __m128i vint; typedef __m256 vfloat; typedef struct { __m128i x, y; } vint2; // #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif static int cpuSupportsAVX() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[2] & (1 << 28)) != 0; } static int cpuSupportsFMA4() { int32_t reg[4]; Sleef_x86CpuID(reg, 0x80000001, 0); return (reg[3] & (1 << 16)) != 0; } #if CONFIG == 4 && defined(__AVX__) && defined(__FMA4__) static INLINE int vavailability_i(int name) { //int d = __builtin_cpu_supports("avx") && __builtin_cpu_supports("fma4"); int d = cpuSupportsAVX() && cpuSupportsFMA4(); return d ? 3 : 0; } //typedef vint2 vint2_fma4; #define ENABLE_FMA_DP #define ENABLE_FMA_SP #define ISANAME "AVX + AMD FMA4" #define DFTPRIORITY 21 #else static INLINE int vavailability_i(int name) { int d = cpuSupportsAVX(); return d ? 3 : 0; } //typedef vint2 vint2_avx; #define ISANAME "AVX" #define DFTPRIORITY 20 #endif static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); } static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); } // static INLINE vdouble vcast_vd_d(double d) { return _mm256_set1_pd(d); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm256_castpd_si256(vd); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm256_castsi256_pd(vm); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { vint2 r; r.x = _mm256_castsi256_si128(vreinterpret_vm_vd(vd)); r.y = _mm256_extractf128_si256(vreinterpret_vm_vd(vd), 1); return r; } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { vmask m = _mm256_castsi128_si256(vi.x); m = _mm256_insertf128_si256(m, vi.y, 1); return vreinterpret_vd_vm(m); } // static vint2 vloadu_vi2_p(int32_t *p) { vint2 r; r.x = _mm_loadu_si128((__m128i *) p ); r.y = _mm_loadu_si128((__m128i *)(p + 4)); return r; } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *) p , v.x); _mm_storeu_si128((__m128i *)(p + 4), v.y); } static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vcast_vo32_vo64(vopmask o) { return _mm256_castsi128_si256(_mm256_cvtpd_epi32(_mm256_and_pd(vreinterpret_vd_vm(o), _mm256_set1_pd(-1.0)))); } static INLINE vopmask vcast_vo64_vo32(vopmask o) { return vreinterpret_vm_vd(_mm256_cmp_pd(_mm256_cvtepi32_pd(_mm256_castsi256_si128(o)), _mm256_set1_pd(-1.0), _CMP_EQ_OQ)); } // static INLINE vint vrint_vi_vd(vdouble vd) { return _mm256_cvtpd_epi32(vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm256_cvttpd_epi32(vd); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm256_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm256_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vcast_vd_vi(vint vi) { return _mm256_cvtepi32_pd(vi); } static INLINE vint vcast_vi_i(int i) { return _mm_set1_epi32(i); } static INLINE vint2 vcastu_vi2_vi(vint vi) { vint2 r; r.x = _mm_and_si128(_mm_shuffle_epi32(vi, 0x40), _mm_set_epi32(-1, 0, -1, 0)); r.y = _mm_and_si128(_mm_shuffle_epi32(vi, 0xc8), _mm_set_epi32(-1, 0, -1, 0)); return r; } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_or_si128(_mm_and_si128(_mm_shuffle_epi32(vi.x, 0x0d), _mm_set_epi32( 0, 0, -1, -1)), _mm_and_si128(_mm_shuffle_epi32(vi.y, 0xd0), _mm_set_epi32(-1, -1, 0, 0))); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm256_set_epi32(i0, i1, i0, i1, i0, i1, i0, i1); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_cmp_pd(vreinterpret_vd_vm(vxor_vm_vm_vm(vxor_vm_vm_vm(x, y), vreinterpret_vm_vd(_mm256_set1_pd(1.0)))), _mm256_set1_pd(1.0), _CMP_EQ_OQ)); } // static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_add_pd(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm256_sub_pd(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm256_mul_pd(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm256_div_pd(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm256_div_pd(_mm256_set1_pd(1), x); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm256_sqrt_pd(x); } static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm256_andnot_pd(_mm256_set1_pd(-0.0), d); } static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm256_xor_pd(_mm256_set1_pd(-0.0), d); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm256_max_pd(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm256_min_pd(x, y); } #if CONFIG == 1 static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } #else static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_msub_pd(x, y, z); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_macc_pd(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_msub_pd(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmacc_pd(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_nmsub_pd(x, y, z); } #endif static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GE_OQ)); } // static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } static INLINE vint vandnot_vi_vo_vi(vopmask m, vint y) { return _mm_andnot_si128(_mm256_castsi256_si128(m), y); } static INLINE vint vand_vi_vo_vi(vopmask m, vint y) { return _mm_and_si128(_mm256_castsi256_si128(m), y); } static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpeq_epi32(x, y)); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpgt_epi32(x, y)); } static INLINE vint vsel_vi_vo_vi_vi(vopmask o, vint x, vint y) { return _mm_blendv_epi8(y, x, _mm256_castsi256_si128(o)); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return _mm256_blendv_pd(y, x, _mm256_castsi256_pd(o)); } static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } static INLINE vopmask visinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visminf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visnan_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, d, _CMP_NEQ_UQ)); } static INLINE vdouble vload_vd_p(const double *ptr) { return _mm256_load_pd(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm256_loadu_pd(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm256_store_pd(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm256_storeu_pd(ptr, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { int a[VECTLENDP]; vstoreu_v_p_vi(a, vi); return _mm256_set_pd(ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); } #if defined(_MSC_VER) // This function is needed when debugging on MSVC. static INLINE double vcast_d_vd(vdouble v) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); return a[0]; } #endif // static INLINE vint2 vcast_vi2_vm(vmask vm) { vint2 r; r.x = _mm256_castsi256_si128(vm); r.y = _mm256_extractf128_si256(vm, 1); return r; } static INLINE vmask vcast_vm_vi2(vint2 vi) { vmask m = _mm256_castsi128_si256(vi.x); m = _mm256_insertf128_si256(m, vi.y, 1); return m; } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvtps_epi32(vf)); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvttps_epi32(vf)); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm256_cvtepi32_ps(vcast_vm_vi2(vi)); } static INLINE vfloat vcast_vf_f(float f) { return _mm256_set1_ps(f); } static INLINE vint2 vcast_vi2_i(int i) { vint2 r; r.x = r.y = _mm_set1_epi32(i); return r; } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm256_castps_si256(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm256_castsi256_ps(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return vreinterpret_vf_vm(vcast_vm_vi2(vi)); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return vcast_vi2_vm(vreinterpret_vm_vf(vf)); } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_add_ps(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm256_sub_ps(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm256_mul_ps(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm256_div_ps(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm256_sqrt_ps(x); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm256_max_ps(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm256_min_ps(x, y); } #if CONFIG == 1 static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } #else static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmacc_ps(x, y, z); } static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_macc_ps(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_msub_ps(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmacc_ps(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_nmsub_ps(x, y, z); } #endif static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GE_OQ)); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_add_epi32(x.x, y.x), _mm_add_epi32(x.y, y.y) }; return vi; } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_sub_epi32(x.x, y.x), _mm_sub_epi32(x.y, y.y) }; return vi; } static INLINE vint2 vneg_vi2_vi2(vint2 e) { vint2 vi = { _mm_sub_epi32(_mm_set1_epi32(0), e.x), _mm_sub_epi32(_mm_set1_epi32(0), e.y) }; return vi; } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_and_si128(x.x, y.x), _mm_and_si128(x.y, y.y) }; return vi; } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_andnot_si128(x.x, y.x), _mm_andnot_si128(x.y, y.y) }; return vi; } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_or_si128(x.x, y.x), _mm_or_si128(x.y, y.y) }; return vi; } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 vi = { _mm_xor_si128(x.x, y.x), _mm_xor_si128(x.y, y.y) }; return vi; } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { vint2 vi = { _mm_slli_epi32(x.x, c), _mm_slli_epi32(x.y, c) }; return vi; } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { vint2 vi = { _mm_srli_epi32(x.x, c), _mm_srli_epi32(x.y, c) }; return vi; } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { vint2 vi = { _mm_srai_epi32(x.x, c), _mm_srai_epi32(x.y, c) }; return vi; } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = _mm_cmpeq_epi32(x.x, y.x); r.y = _mm_cmpeq_epi32(x.y, y.y); return vcast_vm_vi2(r); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = _mm_cmpgt_epi32(x.x, y.x); r.y = _mm_cmpgt_epi32(x.y, y.y); return vcast_vm_vi2(r); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = _mm_cmpeq_epi32(x.x, y.x); r.y = _mm_cmpeq_epi32(x.y, y.y); return r; } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { vint2 r; r.x = _mm_cmpgt_epi32(x.x, y.x); r.y = _mm_cmpgt_epi32(x.y, y.y); return r; } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { vint2 n = vcast_vi2_vm(m); vint2 r = { _mm_blendv_epi8(y.x, x.x, n.x), _mm_blendv_epi8(y.y, x.y, n.y) }; return r; } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { vint2 ix = vcast_vi2_vm(x), iy = vcast_vi2_vm(y), iz; iz.x = _mm_add_epi64(ix.x, iy.x); iz.y = _mm_add_epi64(ix.y, iy.y); return vcast_vm_vi2(iz); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return _mm256_blendv_ps(y, x, _mm256_castsi256_ps(o)); } static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } // static INLINE vfloat vload_vf_p(const float *ptr) { return _mm256_load_ps(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm256_loadu_ps(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm256_store_ps(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm256_storeu_ps(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { int a[VECTLENSP]; vstoreu_v_p_vi2(a, vi2); return _mm256_set_ps(ptr[a[7]], ptr[a[6]], ptr[a[5]], ptr[a[4]], ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); } #ifdef _MSC_VER // This function is needed when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float a[VECTLENSP]; vstoreu_v_p_vf(a, v); return a[0]; } #endif // #define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_addsub_pd(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_addsub_ps(x, y); } #if CONFIG == 1 static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } #else static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } #endif static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm256_shuffle_pd(d0, d0, (0 << 3) | (1 << 2) | (0 << 1) | (1 << 0)); } static INLINE vdouble vreva2_vd_vd(vdouble d0) { d0 = _mm256_permute2f128_pd(d0, d0, 1); return _mm256_shuffle_pd(d0, d0, (1 << 3) | (0 << 2) | (1 << 1) | (0 << 0)); } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm256_stream_pd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_store_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); _mm_store_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); _mm_stream_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); } // static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm256_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { d0 = _mm256_permute2f128_ps(d0, d0, 1); return _mm256_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm256_stream_ps(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); _mm_storel_pd((double *)(ptr+(offset + step * 2)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); _mm_storeh_pd((double *)(ptr+(offset + step * 3)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } sleef-3.3.1/src/arch/helperavx2.h000066400000000000000000000554161333715643700166000ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 1 #ifndef __AVX2__ #error Please specify -mavx2. #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP #define LOG2VECTLENDP 2 #define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP #define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP #define FULL_FP_ROUNDING #define SPLIT_KERNEL #define ACCURATE_SQRT #if defined(_MSC_VER) #include #else #include #endif #include #include "misc.h" typedef __m256i vmask; typedef __m256i vopmask; typedef __m256d vdouble; typedef __m128i vint; typedef __m256 vfloat; typedef __m256i vint2; // #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif static int cpuSupportsAVX2() { int32_t reg[4]; Sleef_x86CpuID(reg, 7, 0); return (reg[1] & (1 << 5)) != 0; } static int cpuSupportsFMA() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[2] & (1 << 12)) != 0; } #if CONFIG == 1 && defined(__AVX2__) static INLINE int vavailability_i(int name) { int d = cpuSupportsAVX2() && cpuSupportsFMA(); return d ? 3 : 0; } #define ISANAME "AVX2" #define DFTPRIORITY 25 #endif static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); } static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_test_all_ones(_mm_and_si128(_mm256_extractf128_si256(g, 0), _mm256_extractf128_si256(g, 1))); } // static INLINE vdouble vcast_vd_d(double d) { return _mm256_set1_pd(d); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm256_castpd_si256(vd); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm256_castsi256_pd(vm); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm256_castpd_si256(vd); } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm256_castsi256_pd(vi); } // static vint2 vloadu_vi2_p(int32_t *p) { return _mm256_loadu_si256((__m256i const *)p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm256_storeu_si256((__m256i *)p, v); } static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm256_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vcast_vo32_vo64(vopmask o) { return _mm256_permutevar8x32_epi32(o, _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0)); } static INLINE vopmask vcast_vo64_vo32(vopmask o) { return _mm256_permutevar8x32_epi32(o, _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0)); } // static INLINE vint vrint_vi_vd(vdouble vd) { return _mm256_cvtpd_epi32(vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm256_cvttpd_epi32(vd); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm256_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm256_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm256_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vcast_vd_vi(vint vi) { return _mm256_cvtepi32_pd(vi); } static INLINE vint vcast_vi_i(int i) { return _mm_set1_epi32(i); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm256_slli_epi64(_mm256_cvtepi32_epi64(vi), 32); } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_or_si128(_mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm256_castsi256_si128(vi)), _mm_set1_ps(0), 0x0d)), _mm_castps_si128(_mm_shuffle_ps(_mm_set1_ps(0), _mm_castsi128_ps(_mm256_extractf128_si256(vi, 1)), 0xd0))); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm256_set_epi32(i0, i1, i0, i1, i0, i1, i0, i1); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm256_cmpeq_epi64(x, y); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm256_add_epi64(x, y); } // static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_add_pd(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm256_sub_pd(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm256_mul_pd(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm256_div_pd(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm256_div_pd(_mm256_set1_pd(1), x); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm256_sqrt_pd(x); } static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm256_andnot_pd(_mm256_set1_pd(-0.0), d); } static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm256_xor_pd(_mm256_set1_pd(-0.0), d); } static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmsub_pd(x, y, z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmadd_pd(x, y, z); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm256_max_pd(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm256_min_pd(x, y); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmadd_pd(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fmsub_pd(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmadd_pd(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm256_fnmsub_pd(x, y, z); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm256_cmp_pd(x, y, _CMP_GE_OQ)); } // static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } static INLINE vint vandnot_vi_vo_vi(vopmask m, vint y) { return _mm_andnot_si128(_mm256_castsi256_si128(m), y); } static INLINE vint vand_vi_vo_vi(vopmask m, vint y) { return _mm_and_si128(_mm256_castsi256_si128(m), y); } static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpeq_epi32(x, y)); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si256(_mm_cmpgt_epi32(x, y)); } static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, _mm256_castsi256_si128(m)); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return _mm256_blendv_pd(y, x, _mm256_castsi256_pd(o)); } static INLINE vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return _mm256_permutevar_pd(_mm256_set_pd(v1, v0, v1, v0), o); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { __m256i v = _mm256_castpd_si256(vsel_vd_vo_vd_vd(o0, _mm256_castsi256_pd(_mm256_set_epi32(1, 0, 1, 0, 1, 0, 1, 0)), vsel_vd_vo_vd_vd(o1, _mm256_castsi256_pd(_mm256_set_epi32(3, 2, 3, 2, 3, 2, 3, 2)), vsel_vd_vo_vd_vd(o2, _mm256_castsi256_pd(_mm256_set_epi32(5, 4, 5, 4, 5, 4, 5, 4)), _mm256_castsi256_pd(_mm256_set_epi32(7, 6, 7, 6, 7, 6, 7, 6)))))); return _mm256_castsi256_pd(_mm256_permutevar8x32_epi32(_mm256_castpd_si256(_mm256_set_pd(d3, d2, d1, d0)), v)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2); } static INLINE vopmask visinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visminf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, _mm256_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visnan_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm256_cmp_pd(d, d, _CMP_NEQ_UQ)); } #if defined(_MSC_VER) // This function is needed when debugging on MSVC. static INLINE double vcast_d_vd(vdouble v) { double s[4]; _mm256_storeu_pd(s, v); return s[0]; } #endif static INLINE vdouble vload_vd_p(const double *ptr) { return _mm256_load_pd(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm256_loadu_pd(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm256_store_pd(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm256_storeu_pd(ptr, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return _mm256_i32gather_pd(ptr, vi, 8); } // static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvtps_epi32(vf)); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm256_cvttps_epi32(vf)); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm256_cvtepi32_ps(vcast_vm_vi2(vi)); } static INLINE vfloat vcast_vf_f(float f) { return _mm256_set1_ps(f); } static INLINE vint2 vcast_vi2_i(int i) { return _mm256_set1_epi32(i); } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm256_castps_si256(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm256_castsi256_ps(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return vreinterpret_vf_vm(vcast_vm_vi2(vi)); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return vcast_vi2_vm(vreinterpret_vm_vf(vf)); } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_add_ps(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm256_sub_ps(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm256_mul_ps(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm256_div_ps(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm256_sqrt_ps(x); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmsub_ps(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmadd_ps(x, y, z); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm256_max_ps(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm256_min_ps(x, y); } static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmadd_ps(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fmsub_ps(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmadd_ps(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm256_fnmsub_ps(x, y, z); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm256_cmp_ps(x, y, _CMP_GE_OQ)); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_add_epi32(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_sub_epi32(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_and_si256(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_andnot_si256(x, y); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_or_si256(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_xor_si256(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return _mm256_slli_epi32(x, c); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return _mm256_srli_epi32(x, c); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return _mm256_srai_epi32(x, c); } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpgt_epi32(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpeq_epi32(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm256_cmpgt_epi32(x, y); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm256_blendv_epi8(y, x, m); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return _mm256_blendv_ps(y, x, _mm256_castsi256_ps(o)); } // At this point, the following three functions are implemented in a generic way, // but I will try target-specific optimization later on. static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } #ifdef _MSC_VER // This function is needed when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float s[8]; _mm256_storeu_ps(s, v); return s[0]; } #endif static INLINE vfloat vload_vf_p(const float *ptr) { return _mm256_load_ps(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm256_loadu_ps(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm256_store_ps(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm256_storeu_ps(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return _mm256_i32gather_ps(ptr, vi2, 4); } // #define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm256_addsub_pd(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm256_addsub_ps(x, y); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm256_shuffle_pd(d0, d0, (0 << 3) | (1 << 2) | (0 << 1) | (1 << 0)); } static INLINE vdouble vreva2_vd_vd(vdouble d0) { d0 = _mm256_permute2f128_pd(d0, d0, 1); return _mm256_shuffle_pd(d0, d0, (1 << 3) | (0 << 2) | (1 << 1) | (0 << 0)); } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm256_stream_pd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_store_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); _mm_store_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd(&ptr[(offset + step * 0)*2], _mm256_extractf128_pd(v, 0)); _mm_stream_pd(&ptr[(offset + step * 1)*2], _mm256_extractf128_pd(v, 1)); } // static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm256_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { d0 = _mm256_permute2f128_ps(d0, d0, 1); return _mm256_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm256_stream_ps(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 0)))); _mm_storel_pd((double *)(ptr+(offset + step * 2)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); _mm_storeh_pd((double *)(ptr+(offset + step * 3)*2), _mm_castsi128_pd(_mm_castps_si128(_mm256_extractf128_ps(v, 1)))); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } sleef-3.3.1/src/arch/helperavx2_128.h000066400000000000000000000514131333715643700171630ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 1 #ifndef __AVX2__ #error Please specify -mavx2. #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP #define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP #define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP #define FULL_FP_ROUNDING #define SPLIT_KERNEL #define ACCURATE_SQRT #if defined(_MSC_VER) #include #else #include #endif #include #include "misc.h" typedef __m128i vmask; typedef __m128i vopmask; typedef __m128d vdouble; typedef __m128i vint; typedef __m128 vfloat; typedef __m128i vint2; // #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif static int cpuSupportsAVX2() { int32_t reg[4]; Sleef_x86CpuID(reg, 7, 0); return (reg[1] & (1 << 5)) != 0; } static int cpuSupportsFMA() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[2] & (1 << 12)) != 0; } #if CONFIG == 1 && defined(__AVX2__) static INLINE int vavailability_i(int name) { int d = cpuSupportsAVX2() && cpuSupportsFMA(); return d ? 3 : 0; } #define ISANAME "AVX2" #define DFTPRIORITY 25 #endif static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } // static INLINE vdouble vcast_vd_d(double d) { return _mm_set1_pd(d); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm_castpd_si128(vd); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm_castsi128_pd(vm); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm_castpd_si128(vd); } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm_castsi128_pd(vi); } // static vint2 vloadu_vi2_p(int32_t *p) { return _mm_loadu_si128((__m128i const *)p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, v); } static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return vreinterpret_vm_vd(_mm_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return vreinterpret_vm_vd(_mm_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_and_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_andnot_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_or_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vreinterpret_vm_vd(_mm_xor_pd(vreinterpret_vd_vm(x), vreinterpret_vd_vm(y))); } static INLINE vopmask vcast_vo32_vo64(vopmask m) { return _mm_shuffle_epi32(m, 0x08); } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return _mm_shuffle_epi32(m, 0x50); } // static INLINE vint vrint_vi_vd(vdouble vd) { return _mm_cvtpd_epi32(vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm_cvttpd_epi32(vd); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vcast_vd_vi(vint vi) { return _mm_cvtepi32_pd(vi); } static INLINE vint vcast_vi_i(int i) { return _mm_set1_epi32(i); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm_and_si128(_mm_shuffle_epi32(vi, 0x73), _mm_set_epi32(-1, 0, -1, 0)); } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_shuffle_epi32(vi, 0x0d); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm_set_epi32(i0, i1, i0, i1); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm_cmpeq_epi64(x, y); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm_add_epi64(x, y); } // static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_add_pd(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm_sub_pd(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm_mul_pd(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm_div_pd(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm_div_pd(_mm_set1_pd(1), x); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm_sqrt_pd(x); } static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm_andnot_pd(_mm_set1_pd(-0.0), d); } static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm_xor_pd(_mm_set1_pd(-0.0), d); } static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fmadd_pd(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fmsub_pd(x, y, z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fnmadd_pd(x, y, z); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm_max_pd(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm_min_pd(x, y); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fmadd_pd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fmadd_pd(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fmsub_pd(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fnmadd_pd(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm_fnmsub_pd(x, y, z); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return vreinterpret_vm_vd(_mm_cmp_pd(x, y, _CMP_GE_OQ)); } // static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, m); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return _mm_blendv_pd(y, x, _mm_castsi128_pd(o)); } static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } static INLINE vopmask visinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmp_pd(vabs_vd_vd(d), _mm_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmp_pd(d, _mm_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visminf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmp_pd(d, _mm_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ)); } static INLINE vopmask visnan_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmp_pd(d, d, _CMP_NEQ_UQ)); } static INLINE vdouble vload_vd_p(const double *ptr) { return _mm_load_pd(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm_loadu_pd(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm_store_pd(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm_storeu_pd(ptr, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return _mm_i32gather_pd(ptr, vi, 8); } #if defined(_MSC_VER) // This function is needed when debugging on MSVC. static INLINE double vcast_d_vd(vdouble v) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); return a[0]; } #endif // static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm_cvtps_epi32(vf)); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm_cvttps_epi32(vf)); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm_cvtepi32_ps(vcast_vm_vi2(vi)); } static INLINE vfloat vcast_vf_f(float f) { return _mm_set1_ps(f); } static INLINE vint2 vcast_vi2_i(int i) { return _mm_set1_epi32(i); } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm_castps_si128(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm_castsi128_ps(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return vreinterpret_vf_vm(vcast_vm_vi2(vi)); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return vcast_vi2_vm(vreinterpret_vm_vf(vf)); } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_add_ps(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm_sub_ps(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm_mul_ps(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm_div_ps(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm_sqrt_ps(x); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fmadd_ps(x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fmsub_ps(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fnmadd_ps(x, y, z); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm_max_ps(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm_min_ps(x, y); } static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fmadd_ps(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fmadd_ps(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fmsub_ps(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fnmadd_ps(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm_fnmsub_ps(x, y, z); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_EQ_OQ)); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_NEQ_UQ)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_LT_OQ)); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_LE_OQ)); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_GT_OQ)); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmp_ps(x, y, _CMP_GE_OQ)); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_add_epi32(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_sub_epi32(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_and_si128(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_andnot_si128(x, y); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_or_si128(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_xor_si128(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi2_vi2_vi2(vcast_vi2_vm(x), y); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return _mm_slli_epi32(x, c); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return _mm_srli_epi32(x, c); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return _mm_srai_epi32(x, c); } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm_blendv_epi8(y, x, m); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return _mm_blendv_ps(y, x, _mm_castsi128_ps(o)); } static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } static INLINE vfloat vload_vf_p(const float *ptr) { return _mm_load_ps(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm_loadu_ps(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm_store_ps(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm_storeu_ps(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return _mm_i32gather_ps(ptr, vi2, 4); } #ifdef _MSC_VER // This function is needed when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float a[VECTLENSP]; vstoreu_v_p_vf(a, v); return a[0]; } #endif // #define PNMASK ((vdouble) { +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_addsub_pd(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_addsub_ps(x, y); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm_shuffle_pd(d0, d0, 1); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm_stream_pd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd((double *)(&ptr[2*offset]), v); } // static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { return _mm_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm_stream_ps(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); } sleef-3.3.1/src/arch/helperavx512f.h000066400000000000000000000633451333715643700171140ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 1 #ifndef __AVX512F__ #error Please specify -mavx512f. #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP #define LOG2VECTLENDP 3 #define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP #define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP #define FULL_FP_ROUNDING #define SPLIT_KERNEL #define ACCURATE_SQRT #if defined(_MSC_VER) #include #else #include #endif #include #include "misc.h" typedef __m512i vmask; typedef __mmask16 vopmask; typedef __m512d vdouble; typedef __m256i vint; typedef __m512 vfloat; typedef __m512i vint2; // #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif static int cpuSupportsAVX512F() { int32_t reg[4]; Sleef_x86CpuID(reg, 7, 0); return (reg[1] & (1 << 16)) != 0; } #if CONFIG == 1 && defined(__AVX512F__) static INLINE int vavailability_i(int name) { int d = cpuSupportsAVX512F(); return d ? 3 : 0; } #define ISANAME "AVX512F" #define DFTPRIORITY 30 #endif static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } #ifdef __INTEL_COMPILER static INLINE int vtestallones_i_vo64(vopmask g) { return _mm512_mask2int(g) == 0xff; } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm512_mask2int(g) == 0xffff; } #else static INLINE int vtestallones_i_vo64(vopmask g) { return g == 0xff; } static INLINE int vtestallones_i_vo32(vopmask g) { return g == 0xffff; } #endif // static vint2 vloadu_vi2_p(int32_t *p) { return _mm512_loadu_si512((__m512i const *)p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm512_storeu_si512((__m512i *)p, v); } static vint vloadu_vi_p(int32_t *p) { return _mm256_loadu_si256((__m256i const *)p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm256_storeu_si256((__m256i *)p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return _mm512_and_si512(x, y); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return _mm512_andnot_si512(x, y); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return _mm512_or_si512(x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return _mm512_xor_si512(x, y); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kand(x, y); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kandn(x, y); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kor(x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return _mm512_kxor(x, y); } static INLINE vmask vand_vm_vo64_vm(vopmask o, vmask m) { return _mm512_mask_and_epi64(_mm512_set1_epi32(0), o, m, m); } static INLINE vmask vandnot_vm_vo64_vm(vopmask o, vmask m) { return _mm512_mask_and_epi64(m, o, _mm512_set1_epi32(0), _mm512_set1_epi32(0)); } static INLINE vmask vor_vm_vo64_vm(vopmask o, vmask m) { return _mm512_mask_or_epi64(m, o, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); } static INLINE vmask vand_vm_vo32_vm(vopmask o, vmask m) { return _mm512_mask_and_epi32(_mm512_set1_epi32(0), o, m, m); } static INLINE vmask vandnot_vm_vo32_vm(vopmask o, vmask m) { return _mm512_mask_and_epi32(m, o, _mm512_set1_epi32(0), _mm512_set1_epi32(0)); } static INLINE vmask vor_vm_vo32_vm(vopmask o, vmask m) { return _mm512_mask_or_epi32(m, o, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); } static INLINE vopmask vcast_vo32_vo64(vopmask o) { return o; } static INLINE vopmask vcast_vo64_vo32(vopmask o) { return o; } // static INLINE vint vrint_vi_vd(vdouble vd) { return _mm512_cvt_roundpd_epi32(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm512_cvt_roundpd_epi32(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vcast_vd_vi(vint vi) { return _mm512_cvtepi32_pd(vi); } static INLINE vint vcast_vi_i(int i) { return _mm256_set1_epi32(i); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { __m256d hi = _mm512_extractf64x4_pd(vd, 1), lo = _mm512_extractf64x4_pd(vd, 0); hi = _mm256_round_pd(hi, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); lo = _mm256_round_pd(lo, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); return _mm512_insertf64x4(_mm512_castpd256_pd512(lo), hi, 1); } static INLINE vdouble vrint_vd_vd(vdouble vd) { __m256d hi = _mm512_extractf64x4_pd(vd, 1), lo = _mm512_extractf64x4_pd(vd, 0); hi = _mm256_round_pd(hi, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); lo = _mm256_round_pd(lo, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); return _mm512_insertf64x4(_mm512_castpd256_pd512(lo), hi, 1); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm512_maskz_permutexvar_epi32(0xaaaa, _mm512_set_epi32(7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0), _mm512_castsi256_si512(vi)); } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm512_castsi512_si256(_mm512_maskz_permutexvar_epi32(0x00ff, _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 15, 13, 11, 9, 7, 5, 3, 1), vi)); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm512_set_epi32(i0, i1, i0, i1, i0, i1, i0, i1, i0, i1, i0, i1, i0, i1, i0, i1); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm512_cmp_epi64_mask(x, y, _MM_CMPINT_EQ); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm512_add_epi64(x, y); } // static INLINE vdouble vcast_vd_d(double d) { return _mm512_set1_pd(d); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm512_castpd_si512(vd); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm512_castsi512_pd(vm); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm512_castpd_si512(vd); } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm512_castsi512_pd(vi); } static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm512_add_pd(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm512_sub_pd(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm512_mul_pd(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm512_div_pd(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm512_div_pd(_mm512_set1_pd(1), x); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm512_sqrt_pd(x); } static INLINE vdouble vabs_vd_vd(vdouble d) { return vreinterpret_vd_vm(_mm512_andnot_si512(vreinterpret_vm_vd(_mm512_set1_pd(-0.0)), vreinterpret_vm_vd(d))); } static INLINE vdouble vneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(_mm512_xor_si512(vreinterpret_vm_vd(_mm512_set1_pd(-0.0)), vreinterpret_vm_vd(d))); } static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmadd_pd(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmsub_pd(x, y, z); } static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fnmadd_pd(x, y, z); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm512_max_pd(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm512_min_pd(x, y); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmadd_pd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmadd_pd(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmsub_pd(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fnmadd_pd(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fnmsub_pd(x, y, z); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_EQ_OQ); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_NEQ_UQ); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_LT_OQ); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_LE_OQ); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_GT_OQ); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return _mm512_cmp_pd_mask(x, y, _CMP_GE_OQ); } // static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm256_add_epi32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm256_sub_epi32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm256_and_si256(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm256_andnot_si256(x, y); } static INLINE vint vandnot_vi_vo_vi(vopmask o, vint y) { return _mm512_castsi512_si256(_mm512_mask_and_epi32(_mm512_castsi256_si512(y), o, _mm512_set1_epi32(0), _mm512_set1_epi32(0))); } static INLINE vint vand_vi_vo_vi(vopmask o, vint y) { return _mm512_castsi512_si256(_mm512_mask_and_epi32(_mm512_set1_epi32(0), o, _mm512_castsi256_si512(y), _mm512_castsi256_si512(y))); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm256_or_si256(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm256_xor_si256(x, y); } #define vsll_vi_vi_i(x, c) _mm256_slli_epi32(x, c) #define vsrl_vi_vi_i(x, c) _mm256_srli_epi32(x, c) #define vsra_vi_vi_i(x, c) _mm256_srai_epi32(x, c) static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm256_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm256_cmpgt_epi32(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm512_cmp_epi32_mask(_mm512_castsi256_si512(x), _mm512_castsi256_si512(y), _MM_CMPINT_EQ); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm512_cmp_epi32_mask(_mm512_castsi256_si512(y), _mm512_castsi256_si512(x), _MM_CMPINT_LT); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) { return _mm512_mask_blend_pd(mask, y, x); } static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } #if 1 // Probably this is faster static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { __m512i v = _mm512_castpd_si512(vsel_vd_vo_vd_vd(o0, _mm512_castsi512_pd(_mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0)), vsel_vd_vo_vd_vd(o1, _mm512_castsi512_pd(_mm512_set_epi64(1, 1, 1, 1, 1, 1, 1, 1)), vsel_vd_vo_vd_vd(o2, _mm512_castsi512_pd(_mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2)), _mm512_castsi512_pd(_mm512_set_epi64(3, 3, 3, 3, 3, 3, 3, 3)))))); return _mm512_permutexvar_pd(v, _mm512_castpd256_pd512(_mm256_set_pd(d3, d2, d1, d0))); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2); } #else static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } #endif static INLINE vopmask visinf_vo_vd(vdouble d) { return _mm512_cmp_pd_mask(vabs_vd_vd(d), _mm512_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return _mm512_cmp_pd_mask(d, _mm512_set1_pd(SLEEF_INFINITY), _CMP_EQ_OQ); } static INLINE vopmask visminf_vo_vd(vdouble d) { return _mm512_cmp_pd_mask(d, _mm512_set1_pd(-SLEEF_INFINITY), _CMP_EQ_OQ); } static INLINE vopmask visnan_vo_vd(vdouble d) { return _mm512_cmp_pd_mask(d, d, _CMP_NEQ_UQ); } static INLINE vint vilogbk_vi_vd(vdouble d) { return vrint_vi_vd(_mm512_getexp_pd(d)); } // vilogb2k_vi_vd is similar to vilogbk_vi_vd, but the argument has to // be a normalized FP value. static INLINE vint vilogb2k_vi_vd(vdouble d) { return vrint_vi_vd(_mm512_getexp_pd(d)); } static INLINE vdouble vgetexp_vd_vd(vdouble d) { return _mm512_getexp_pd(d); } static INLINE vfloat vgetexp_vf_vf(vfloat d) { return _mm512_getexp_ps(d); } static INLINE vdouble vgetmant_vd_vd(vdouble d) { return _mm512_getmant_pd(d, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan); } static INLINE vfloat vgetmant_vf_vf(vfloat d) { return _mm512_getmant_ps(d, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_nan); } #define vfixup_vd_vd_vd_vi2_i(a, b, c, imm) _mm512_fixupimm_pd((a), (b), (c), (imm)) #define vfixup_vf_vf_vf_vi2_i(a, b, c, imm) _mm512_fixupimm_ps((a), (b), (c), (imm)) #if defined(_MSC_VER) // This function is needed when debugging on MSVC. static INLINE double vcast_d_vd(vdouble v) { double s[VECTLENDP]; _mm512_storeu_pd(s, v); return s[0]; } #endif static INLINE vdouble vload_vd_p(const double *ptr) { return _mm512_load_pd(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm512_loadu_pd(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm512_store_pd(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm512_storeu_pd(ptr, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return _mm512_i32gather_pd(vi, ptr, 8); } // static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm512_castsi512_si256(_mm512_mask_blend_epi32(m, _mm512_castsi256_si512(y), _mm512_castsi256_si512(x))); } // static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm512_castps_si512(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm512_castsi512_ps(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return _mm512_castsi512_ps(vi); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return _mm512_castps_si512(vf); } static INLINE vdouble vreinterpret_vd_vf(vfloat vf) { return _mm512_castps_pd(vf); } static INLINE vfloat vreinterpret_vf_vd(vdouble vd) { return _mm512_castpd_ps(vd); } static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm512_cvtepi32_ps(vcast_vm_vi2(vi)); } static INLINE vfloat vcast_vf_f(float f) { return _mm512_set1_ps(f); } static INLINE vint2 vcast_vi2_i(int i) { return _mm512_set1_epi32(i); } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm512_cvtps_epi32(vf)); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcast_vi2_vm(_mm512_cvttps_epi32(vf)); } static INLINE vfloat vtruncate_vf_vf(vfloat vd) { __m256 hi = _mm256_castpd_ps(_mm512_extractf64x4_pd(vreinterpret_vd_vf(vd), 1)); __m256 lo = _mm256_castpd_ps(_mm512_extractf64x4_pd(vreinterpret_vd_vf(vd), 0)); hi = _mm256_round_ps(hi, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); lo = _mm256_round_ps(lo, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); return vreinterpret_vf_vd(_mm512_insertf64x4(_mm512_castpd256_pd512(_mm256_castps_pd(lo)), _mm256_castps_pd(hi), 1)); } static INLINE vfloat vrint_vf_vf(vfloat vd) { __m256 hi = _mm256_castpd_ps(_mm512_extractf64x4_pd(vreinterpret_vd_vf(vd), 1)); __m256 lo = _mm256_castpd_ps(_mm512_extractf64x4_pd(vreinterpret_vd_vf(vd), 0)); hi = _mm256_round_ps(hi, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); lo = _mm256_round_ps(lo, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); return vreinterpret_vf_vd(_mm512_insertf64x4(_mm512_castpd256_pd512(_mm256_castps_pd(lo)), _mm256_castps_pd(hi), 1)); } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm512_add_ps(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm512_sub_ps(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm512_mul_ps(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm512_div_ps(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm512_sqrt_ps(x); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmadd_ps(x, y, z); } static INLINE vfloat vmlapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmsub_ps(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fnmadd_ps(x, y, z); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm512_max_ps(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm512_min_ps(x, y); } static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmadd_ps(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmadd_ps(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmsub_ps(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fnmadd_ps(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fnmsub_ps(x, y, z); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_EQ_OQ); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_NEQ_UQ); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_LT_OQ); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_LE_OQ); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_GT_OQ); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return _mm512_cmp_ps_mask(x, y, _CMP_GE_OQ); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_add_epi32(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_sub_epi32(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_and_si512(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_andnot_si512(x, y); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_or_si512(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm512_xor_si512(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask o, vint2 m) { return _mm512_mask_and_epi32(_mm512_set1_epi32(0), o, m, m); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask o, vint2 m) { return _mm512_mask_and_epi32(m, o, _mm512_set1_epi32(0), _mm512_set1_epi32(0)); } #define vsll_vi2_vi2_i(x, c) _mm512_slli_epi32(x, c) #define vsrl_vi2_vi2_i(x, c) _mm512_srli_epi32(x, c) #define vsra_vi2_vi2_i(x, c) _mm512_srai_epi32(x, c) static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm512_cmpeq_epi32_mask(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm512_cmpgt_epi32_mask(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { __mmask16 m = _mm512_cmp_epi32_mask(x, y, _MM_CMPINT_EQ); return _mm512_mask_and_epi32(_mm512_set1_epi32(0), m, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { __mmask16 m = _mm512_cmp_epi32_mask(y, x, _MM_CMPINT_LT); return _mm512_mask_and_epi32(_mm512_set1_epi32(0), m, _mm512_set1_epi32(-1), _mm512_set1_epi32(-1)); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm512_mask_blend_epi32(m, y, x); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask m, vfloat x, vfloat y) { return _mm512_mask_blend_ps(m, y, x); } // At this point, the following three functions are implemented in a generic way, // but I will try target-specific optimization later on. static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } static INLINE vint2 vilogbk_vi2_vf(vfloat d) { return vrint_vi2_vf(_mm512_getexp_ps(d)); } static INLINE vint2 vilogb2k_vi2_vf(vfloat d) { return vrint_vi2_vf(_mm512_getexp_ps(d)); } #ifdef _MSC_VER // This function is needed when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float s[VECTLENSP]; _mm512_storeu_ps(s, v); return s[0]; } #endif static INLINE vfloat vload_vf_p(const float *ptr) { return _mm512_load_ps(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm512_loadu_ps(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm512_store_ps(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm512_storeu_ps(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return _mm512_i32gather_ps(vi2, ptr, 4); } // static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vd(d), 0xcccc, vreinterpret_vm_vd(d), vreinterpret_vm_vd(_mm512_set1_pd(-0.0)))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vd(d), 0x3333, vreinterpret_vm_vd(d), vreinterpret_vm_vd(_mm512_set1_pd(-0.0)))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vf(d), 0xaaaa, vreinterpret_vm_vf(d), vreinterpret_vm_vf(_mm512_set1_ps(-0.0f)))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(_mm512_mask_xor_epi32(vreinterpret_vm_vf(d), 0x5555, vreinterpret_vm_vf(d), vreinterpret_vm_vf(_mm512_set1_ps(-0.0f)))); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vadd_vf_vf_vf(x, vnegpos_vf_vf(y)); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return _mm512_fmaddsub_pd(x, y, z); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return _mm512_fmaddsub_ps(x, y, z); } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return _mm512_permute_pd(vd, 0x55); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vreinterpret_vd_vm(_mm512_permutexvar_epi32(_mm512_set_epi32(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12), vreinterpret_vm_vd(vd))); } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm512_stream_pd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_store_pd(&ptr[(offset + step * 0)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 0))); _mm_store_pd(&ptr[(offset + step * 1)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 1))); _mm_store_pd(&ptr[(offset + step * 2)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 2))); _mm_store_pd(&ptr[(offset + step * 3)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 3))); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd(&ptr[(offset + step * 0)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 0))); _mm_stream_pd(&ptr[(offset + step * 1)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 1))); _mm_stream_pd(&ptr[(offset + step * 2)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 2))); _mm_stream_pd(&ptr[(offset + step * 3)*2], _mm_castps_pd(_mm512_extractf32x4_ps(vreinterpret_vf_vd(v), 3))); } // static INLINE vfloat vrev21_vf_vf(vfloat vf) { return _mm512_permute_ps(vf, 0xb1); } static INLINE vfloat vreva2_vf_vf(vfloat vf) { return vreinterpret_vf_vm(_mm512_permutexvar_epi32(_mm512_set_epi32(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14), vreinterpret_vm_vf(vf))); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm512_stream_ps(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 0))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 0))); _mm_storel_pd((double *)(ptr+(offset + step * 2)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 1))); _mm_storeh_pd((double *)(ptr+(offset + step * 3)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 1))); _mm_storel_pd((double *)(ptr+(offset + step * 4)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 2))); _mm_storeh_pd((double *)(ptr+(offset + step * 5)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 2))); _mm_storel_pd((double *)(ptr+(offset + step * 6)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 3))); _mm_storeh_pd((double *)(ptr+(offset + step * 7)*2), _mm_castps_pd(_mm512_extractf32x4_ps(v, 3))); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } sleef-3.3.1/src/arch/helperneon32.h000066400000000000000000000252201333715643700170120ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifndef __ARM_NEON #error Please specify -mfpu=neon. #endif #ifdef __aarch64__ #warning This implementation is for AARCH32. #endif #define ENABLE_SP #define LOG2VECTLENSP 2 #define VECTLENSP (1 << LOG2VECTLENSP) #define ISANAME "AARCH32 NEON" #define DFTPRIORITY 10 #define ENABLE_RECSQRT_SP #include #include #include "misc.h" typedef uint32x4_t vmask; typedef uint32x4_t vopmask; //typedef int32x4_t vint; typedef float32x4_t vfloat; typedef int32x4_t vint2; // static INLINE void vprefetch_v_p(const void *ptr) { } static INLINE int vtestallones_i_vo32(vopmask g) { uint32x2_t x0 = vand_u32(vget_low_u32(g), vget_high_u32(g)); uint32x2_t x1 = vpmin_u32(x0, x0); return vget_lane_u32(x1, 0); } static vint2 vloadu_vi2_p(int32_t *p) { return vld1q_s32(p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { vst1q_s32(p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vandq_u32(x, y); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vbicq_u32(y, x); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vorrq_u32(x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return veorq_u32(x, y); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vandq_u32(x, y); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vbicq_u32(y, x); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vorrq_u32(x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return veorq_u32(x, y); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vandq_u32(x, y); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vbicq_u32(y, x); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vorrq_u32(x, y); } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return veorq_u32(x, y); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vandq_u32(x, y); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vbicq_u32(y, x); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vorrq_u32(x, y); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return veorq_u32(x, y); } static INLINE vopmask vcast_vo32_vo64(vopmask m) { return vuzpq_u32(m, m).val[0]; } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return vzipq_u32(m, m).val[0]; } // static INLINE vmask vcast_vm_i_i(int i0, int i1) { return (vmask)vdupq_n_u64((uint64_t)i0 | (((uint64_t)i1) << 32)); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { uint32x4_t t = vceqq_u32(x, y); return vandq_u32(t, vrev64q_u32(t)); } // static INLINE vint2 vcast_vi2_vm(vmask vm) { return (vint2)vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return (vmask)vi; } static INLINE vint2 vrint_vi2_vf(vfloat d) { return vcvtq_s32_f32(vaddq_f32(d, (float32x4_t)vorrq_u32(vandq_u32((uint32x4_t)d, (uint32x4_t)vdupq_n_f32(-0.0f)), (uint32x4_t)vdupq_n_f32(0.5f)))); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vcvtq_s32_f32(vf); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return vcvtq_f32_s32(vi); } static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vcast_vf_vi2(vtruncate_vi2_vf(vd)); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return vcast_vf_vi2(vrint_vi2_vf(vd)); } static INLINE vfloat vcast_vf_f(float f) { return vdupq_n_f32(f); } static INLINE vint2 vcast_vi2_i(int i) { return vdupq_n_s32(i); } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (vmask)vf; } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (vfloat)vm; } static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return (vfloat)vm; } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return (vint2)vf; } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return vaddq_f32(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return vsubq_f32(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return vmulq_f32(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat n, vfloat d) { float32x4_t x = vrecpeq_f32(d); x = vmulq_f32(x, vrecpsq_f32(d, x)); float32x4_t t = vmulq_f32(n, x); return vmlsq_f32(vaddq_f32(t, t), vmulq_f32(t, x), d); } static INLINE vfloat vrec_vf_vf(vfloat d) { float32x4_t x = vrecpeq_f32(d); x = vmulq_f32(x, vrecpsq_f32(d, x)); return vmlsq_f32(vaddq_f32(x, x), vmulq_f32(x, x), d); } static INLINE vfloat vsqrt_vf_vf(vfloat d) { float32x4_t x = vrsqrteq_f32(d); x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); float32x4_t u = vmulq_f32(x, d); u = vmlaq_f32(u, vmlsq_f32(d, u, u), vmulq_f32(x, vdupq_n_f32(0.5))); return (float32x4_t)vbicq_u32((uint32x4_t)u, vceqq_f32(d, vdupq_n_f32(0.0f))); } static INLINE vfloat vrecsqrt_vf_vf(vfloat d) { float32x4_t x = vrsqrteq_f32(d); x = vmulq_f32(x, vrsqrtsq_f32(d, vmulq_f32(x, x))); return vmlaq_f32(x, vmlsq_f32(vdupq_n_f32(1), x, vmulq_f32(x, d)), vmulq_f32(x, vdupq_n_f32(0.5))); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vabsq_f32(f); } static INLINE vfloat vneg_vf_vf(vfloat f) { return vnegq_f32(f); } static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmlaq_f32(z, x, y); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmlsq_f32(z, x, y); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return vmaxq_f32(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vminq_f32(x, y); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vceqq_f32(x, y); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vmvnq_u32(vceqq_f32(x, y)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vcltq_f32(x, y); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vcleq_f32(x, y); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vcgtq_f32(x, y); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vcgeq_f32(x, y); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vaddq_s32(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vsubq_s32(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vnegq_s32(e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vandq_s32(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vbicq_s32(y, x); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vorrq_s32(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return veorq_s32(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)vandq_u32(x, (vopmask)y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)vbicq_u32((vopmask)y, x); } #define vsll_vi2_vi2_i(x, c) vshlq_n_s32(x, c) #define vsrl_vi2_vi2_i(x, c) vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) vshrq_n_s32(x, c) static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return vceqq_s32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return vcgtq_s32(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return (vint2)vceqq_s32(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return (vint2)vcgtq_s32(x, y); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return (vint2)vbslq_u32(m, (vmask)x, (vmask)y); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { return (vfloat)vbslq_u32(mask, (vmask)x, (vmask)y); } static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } // This function is needed when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float p[4]; vst1q_f32 (p, v); return p[0]; } static INLINE int vavailability_i(int name) { if (name != 2) return 0; return vcast_f_vf(vadd_vf_vf_vf(vcast_vf_f(name), vcast_vf_f(name))) != 0.0; } static INLINE vfloat vload_vf_p(const float *ptr) { return vld1q_f32(__builtin_assume_aligned(ptr, 16)); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return vld1q_f32(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vst1q_f32(__builtin_assume_aligned(ptr, 16), v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { vst1q_f32(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return ((vfloat) { ptr[vgetq_lane_s32(vi2, 0)], ptr[vgetq_lane_s32(vi2, 1)], ptr[vgetq_lane_s32(vi2, 2)], ptr[vgetq_lane_s32(vi2, 3)] }); } #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vfloat vposneg_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)PNMASKf); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)vxor_vm_vm_vm((vmask)d, (vmask)NPMASKf); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat d0, vfloat d1) { return vadd_vf_vf_vf(d0, vnegpos_vf_vf(d1)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vfloat vrev21_vf_vf(vfloat d0) { return vrev64q_f32(d0); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { return vcombine_f32(vget_high_f32(d0), vget_low_f32(d0)); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vst1_f32((float *)(ptr+(offset + step * 0)*2), vget_low_f32(v)); vst1_f32((float *)(ptr+(offset + step * 1)*2), vget_high_f32(v)); } sleef-3.3.1/src/arch/helperpower_128.h000066400000000000000000000462511333715643700174430ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #if CONFIG == 1 #ifndef __VSX__ #error Please specify -mvsx. #endif #else #error CONFIG macro invalid or not defined #endif #define ENABLE_DP #define LOG2VECTLENDP 1 #define VECTLENDP (1 << LOG2VECTLENDP) #define ENABLE_FMA_DP #define ENABLE_SP #define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) #define ENABLE_FMA_SP #define ACCURATE_SQRT #define FULL_FP_ROUNDING //#define SPLIT_KERNEL // Benchmark comparison is needed to determine whether this option should be enabled. #include #include #include "misc.h" typedef vector unsigned int vmask; typedef vector unsigned int vopmask; typedef vector double vdouble; typedef vector int vint; typedef vector float vfloat; typedef vector int vint2; // static INLINE int vavailability_i(int name) { return 3; } #define ISANAME "VSX" #define DFTPRIORITY 25 static INLINE void vprefetch_v_p(const void *ptr) { } static vint2 vloadu_vi2_p(int32_t *p) { return vec_ld(0, p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { vec_st(v, 0, p); } static vint vloadu_vi_p(int32_t *p) { return vec_ld(0, p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { vec_st(v, 0, p); } static INLINE vdouble vload_vd_p(const double *ptr) { return (vector double)vec_ld(0, (const int *)ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { vec_st((vector int)v, 0, (int *)ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return (vector double) ( ptr[0], ptr[1] ); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { ptr[0] = v[0]; ptr[1] = v[1]; } static INLINE vfloat vload_vf_p(const float *ptr) { return (vector float)vec_ld(0, (const int *)ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vec_st((vector int)v, 0, (int *)ptr); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { *(ptr+(offset + step * 0)*2 + 0) = v[0]; *(ptr+(offset + step * 0)*2 + 1) = v[1]; *(ptr+(offset + step * 1)*2 + 0) = v[2]; *(ptr+(offset + step * 1)*2 + 1) = v[3]; } static INLINE vfloat vloadu_vf_p(const float *ptr) { return (vfloat) ( ptr[0], ptr[1], ptr[2], ptr[3] ); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { ptr[0] = v[0]; ptr[1] = v[1]; ptr[2] = v[2]; ptr[3] = v[3]; } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { int a[VECTLENDP]; vstoreu_v_p_vi(a, vi); return ((vdouble) { ptr[a[0]], ptr[a[1]] }); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { int a[VECTLENSP]; vstoreu_v_p_vi2(a, vi2); return ((vfloat) { ptr[a[0]], ptr[a[1]], ptr[a[2]], ptr[a[3]] }); } static INLINE vint vcast_vi_i(int i) { return (vint) { i, i }; } static INLINE vint2 vcast_vi2_i(int i) { return (vint2) { i, i, i, i }; } static INLINE vfloat vcast_vf_f(float f) { return (vfloat) { f, f, f, f }; } static INLINE vdouble vcast_vd_d(double d) { return (vdouble) { d, d }; } static INLINE vdouble vcast_vd_vi(vint vi) { return vec_doubleh(vi); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return vec_float(vi); } static INLINE vint vrint_vi_vd(vdouble vd) { vd = vec_signed(vec_round(vd)); return vec_perm(vd, vd, (vector unsigned char)(0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15)); } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return vec_signed(vec_round(vf)); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return vec_perm(vec_signed(vd), vec_signed(vd), (vector unsigned char)(0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15)); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return vec_signed(vf); } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vec_trunc(vd); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return vec_trunc(vf); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return vec_round(vd); } static INLINE vfloat vrint_vf_vf(vfloat vf) { return vec_round(vf); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return (vmask)vd; } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return (vdouble)vm; } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return (vint2)vd; } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return (vdouble)vi; } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return (vmask)vf; } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return (vfloat)vm; } static INLINE vfloat vreinterpret_vf_vi2(vint2 vi) { return (vfloat)vi; } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return (vint2)vf; } static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return vec_add(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return vec_sub(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return vec_mul(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return vec_div(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return vec_div(vcast_vd_d(1.0), x); } static INLINE vdouble vneg_vd_vd(vdouble d) { return vec_neg(d); } static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return vec_add(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return vec_sub(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return vec_mul(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return vec_div(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vec_div(vcast_vf_f(1.0f), x); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vec_neg(d); } static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return vec_and(x, y); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return vec_andc(y, x); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return vec_or(x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return vec_xor(x, y); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return vec_and(x, y); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return vec_andc(y, x); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return vec_or(x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return vec_xor(x, y); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return vec_and((vmask)x, y); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return vec_andc(y, x); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return vec_or((vmask)x, y); } static INLINE vmask vxor_vm_vo64_vm(vopmask x, vmask y) { return vec_xor((vmask)x, y); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return vec_and((vmask)x, y); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return vec_andc(y, x); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return vec_or((vmask)x, y); } static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return vec_xor((vmask)x, y); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return vec_sel(y, x, (vector unsigned long long)o); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask o, vfloat x, vfloat y) { return vec_sel(y, x, o); } static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask o, vint2 x, vint2 y) { return vec_sel(y, x, o); } static INLINE int vtestallones_i_vo64(vopmask g) { return vec_all_ne(vec_and(g, (vector unsigned int)(0, 0, 0xffffffff, 0xffffffff)), (vector unsigned int)(0, 0, 0, 0)); } static INLINE int vtestallones_i_vo32(vopmask g) { return vec_all_ne(g, (vector unsigned int)(0, 0, 0, 0)); } static INLINE vopmask vcast_vo32_vo64(vopmask m) { return vec_perm(m, m, (vector unsigned char)(4, 5, 6, 7, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15 )); } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return vec_perm(m, m, (vector unsigned char)(0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7)); } static INLINE vmask vcast_vm_i_i(int h, int l) { return (vmask){ l, h, l, h }; } static INLINE vint2 vcastu_vi2_vi(vint vi) { return (vint2){ 0, vi[0], 0, vi[1] }; } static INLINE vint vcastu_vi_vi2(vint2 vi2) { return (vint){ vi2[1], vi2[3] }; } static INLINE vint vreinterpretFirstHalf_vi_vi2(vint2 vi2) { return (vint){ vi2[0], vi2[1] }; } static INLINE vint2 vreinterpretFirstHalf_vi2_vi(vint vi) { return (vint2){ vi[0], vi[1], 0, 0 }; } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return vec_perm(vd, vd, (vector unsigned char)(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } static INLINE vfloat vrev21_vf_vf(vfloat vf) { return vec_perm(vf, vf, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)); } static INLINE vfloat vreva2_vf_vf(vfloat vf) { return vec_perm(vf, vf, (vector unsigned char)(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { vopmask o = vec_cmpeq(x, y); return o & vec_perm(o, o, (vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return (vmask)vec_add((vector long long)x, (vector long long)y); } // #define PNMASK ((vdouble) { +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } // static INLINE vdouble vabs_vd_vd(vdouble d) { return vec_abs(d); } static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_madd(x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_msub(x, y, z); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return vec_max(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return vec_min(x, y); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vmla_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } static INLINE vdouble vsqrt_vd_vd(vdouble d) { return vec_sqrt(d); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_madd(x, y, z); } static INLINE vdouble vfmapp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_madd(x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_msub(x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_nmsub(x, y, z); } static INLINE vdouble vfmann_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vec_nmadd(x, y, z); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vec_abs(f); } static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_madd(x, y, z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_nmsub(x, y, z); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return vec_max(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vec_min(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vadd_vf_vf_vf(x, vnegpos_vf_vf(y)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vmla_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } static INLINE vfloat vsqrt_vf_vf(vfloat d) { return vec_sqrt(d); } static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_madd(x, y, z); } static INLINE vfloat vfmapp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_madd(x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_msub(x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_nmsub(x, y, z); } static INLINE vfloat vfmann_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vec_nmadd(x, y, z); } // static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } // static INLINE vopmask vnot_vo_vo(vopmask o) { return vec_nand(o, o); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmpeq(x, y); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vnot_vo_vo(vec_cmpeq(x, y)); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmplt(x, y); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmple(x, y); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmpgt(x, y); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)vec_cmpge(x, y); } static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return vec_add(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return vec_sub(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vec_neg(e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return vec_and(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return vec_andc(y, x); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return vec_or(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return vec_xor(x, y); } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return vreinterpretFirstHalf_vi_vi2((vint2)x) & y; } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return vec_andc(y, vreinterpretFirstHalf_vi_vi2((vint2)x)); } static INLINE vint vsll_vi_vi_i(vint x, int c) { return vec_sl (x, (vector unsigned int)(c, c, c, c)); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return vec_sr (x, (vector unsigned int)(c, c, c, c)); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return vec_sra(x, (vector unsigned int)(c, c, c, c)); } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return vec_cmpeq(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return vec_cmpgt(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return (vopmask)vreinterpretFirstHalf_vi2_vi(vec_cmpeq(x, y)); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return (vopmask)vreinterpretFirstHalf_vi2_vi(vec_cmpgt(x, y));} static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return vor_vi_vi_vi(vand_vi_vi_vi(vreinterpretFirstHalf_vi_vi2((vint2)m), x), vandnot_vi_vi_vi(vreinterpretFirstHalf_vi_vi2((vint2)m), y)); } static INLINE vopmask visinf_vo_vd(vdouble d) { return (vopmask)(vec_cmpeq(vabs_vd_vd(d), vcast_vd_d(SLEEF_INFINITY))); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return (vopmask)(vec_cmpeq(d, vcast_vd_d(SLEEF_INFINITY))); } static INLINE vopmask visminf_vo_vd(vdouble d) { return (vopmask)(vec_cmpeq(d, vcast_vd_d(-SLEEF_INFINITY))); } static INLINE vopmask visnan_vo_vd(vdouble d) { return (vopmask)(vnot_vo_vo(vec_cmpeq(d, d))); } static INLINE double vcast_d_vd(vdouble v) { return v[0]; } static INLINE float vcast_f_vf(vfloat v) { return v[0]; } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { vstore_v_p_vd(ptr, v); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vscatter2_v_p_i_i_vd(ptr, offset, step, v); } // static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vint2 vcast_vi2_vm(vmask vm) { return (vint2)vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return (vmask)vi; } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vec_cmpeq(x, y); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vnot_vo_vo(vec_cmpeq(x, y)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vec_cmplt(x, y); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vec_cmple(x, y); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vec_cmpgt(x, y); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)vec_cmpge(x, y); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_add(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_sub(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vec_neg(e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_and(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_andc(y, x); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_or(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_xor(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)vec_and((vint2)x, y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vec_andc(y, (vint2)x); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return vec_sl (x, (vector unsigned int)(c, c, c, c)); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return vec_sr (x, (vector unsigned int)(c, c, c, c)); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return vec_sra(x, (vector unsigned int)(c, c, c, c)); } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return (vopmask)vec_cmpeq(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return (vopmask)vec_cmpgt(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_cmpeq(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return vec_cmpgt(x, y); } static INLINE vopmask visinf_vo_vf(vfloat d) { return (vopmask)vec_cmpeq(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return (vopmask)vec_cmpeq(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return (vopmask)vec_cmpeq(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return (vopmask)vnot_vo_vo(vec_cmpeq(d, d)); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } sleef-3.3.1/src/arch/helperpurec.h000066400000000000000000000744301333715643700170330ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include "misc.h" #ifndef CONFIG #error CONFIG macro not defined #endif #define ENABLE_DP #define ENABLE_SP #define LOG2VECTLENDP CONFIG #define VECTLENDP (1 << LOG2VECTLENDP) #define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) #define ACCURATE_SQRT #define DFTPRIORITY LOG2VECTLENDP #define ISANAME "Pure C Array" typedef union { uint32_t u[VECTLENDP*2]; uint64_t x[VECTLENDP]; double d[VECTLENDP]; float f[VECTLENDP*2]; int32_t i[VECTLENDP*2]; } versatileVector; typedef versatileVector vmask; typedef versatileVector vopmask; typedef versatileVector vdouble; typedef versatileVector vint; typedef versatileVector vfloat; typedef versatileVector vint2; typedef union { uint8_t u[sizeof(long double)*VECTLENDP]; long double ld[VECTLENDP]; } longdoubleVector; typedef longdoubleVector vmaskl; typedef longdoubleVector vlongdouble; #ifdef Sleef_quad2_DEFINED typedef union { uint8_t u[sizeof(Sleef_quad)*VECTLENDP]; Sleef_quad q[VECTLENDP]; } quadVector; typedef quadVector vmaskq; typedef quadVector vquad; #endif // static INLINE int vavailability_i(int name) { return -1; } static INLINE void vprefetch_v_p(const void *ptr) { } static INLINE int vtestallones_i_vo64(vopmask g) { int ret = 1; for(int i=0;i 0 ? (int)(vd.d[i] + 0.5) : (int)(vd.d[i] - 0.5); return ret; } static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vcast_vd_vi(vtruncate_vi_vd(vd)); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return vcast_vd_vi(vrint_vi_vd(vd)); } static INLINE vint vcast_vi_i(int j) { vint ret; for(int i=0;i y.d[i] ? x.d[i] : y.d[i]; return ret; } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { vdouble ret; for(int i=0;i y.d[i] ? -1 : 0; return ret; } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { vopmask ret; for(int i=0;i= y.d[i] ? -1 : 0; return ret; } static INLINE vint vadd_vi_vi_vi(vint x, vint y) { vint ret; for(int i=0;i> c; return ret; } static INLINE vint vsra_vi_vi_i(vint x, int c) { vint ret; for(int i=0;i> c; return ret; } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { vopmask ret; for(int i=0;i y.i[i] ? -1 : 0; return ret; } static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { union { vopmask vo; vint2 vi2; } cnv; cnv.vo = m; return vor_vi_vi_vi(vand_vi_vi_vi(vreinterpretFirstHalf_vi_vi2(cnv.vi2), x), vandnot_vi_vi_vi(vreinterpretFirstHalf_vi_vi2(cnv.vi2), y)); } static INLINE vopmask visinf_vo_vd(vdouble d) { vopmask ret; for(int i=0;i 0 ? (int)(vf.f[i] + 0.5) : (int)(vf.f[i] - 0.5); return ret; } static INLINE vint2 vcast_vi2_i(int j) { vint2 ret; for(int i=0;i y.f[i] ? x.f[i] : y.f[i]; return ret; } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { vfloat ret; for(int i=0;i y.f[i]) ? -1 : 0); return ret; } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { vopmask ret; for(int i=0;i= y.f[i]) ? -1 : 0); return ret; } static INLINE vint vadd_vi2_vi2_vi2(vint x, vint y) { vint ret; for(int i=0;i> c; return ret; } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { vint2 ret; for(int i=0;i> c; return ret; } static INLINE vopmask visinf_vo_vf (vfloat d) { vopmask ret; for(int i=0;i y.i[i] ? -1 : 0; return ret; } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { vopmask ret; for(int i=0;i y.i[i] ? -1 : 0; return ret; } static INLINE vfloat vsqrt_vf_vf(vfloat x) { vfloat ret; for(int i=0;i #else #include #endif #include #include "misc.h" typedef __m128i vmask; typedef __m128i vopmask; typedef __m128d vdouble; typedef __m128i vint; typedef __m128 vfloat; typedef __m128i vint2; // #ifndef __SLEEF_H__ void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif static int cpuSupportsSSE2() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[3] & (1 << 26)) != 0; } static int cpuSupportsSSE3() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[2] & (1 << 0)) != 0; } static int cpuSupportsSSE4_1() { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); return (reg[2] & (1 << 19)) != 0; } #if defined(__SSE2__) && defined(__SSE3__) && defined(__SSE4_1__) static INLINE int vavailability_i(int name) { //int d = __builtin_cpu_supports("sse2") && __builtin_cpu_supports("sse3") && __builtin_cpu_supports("sse4.1"); int d = cpuSupportsSSE2() && cpuSupportsSSE3() && cpuSupportsSSE4_1(); return d ? 3 : 0; } #define ISANAME "SSE4.1" #define DFTPRIORITY 12 #elif defined(__SSE2__) && defined(__SSE3__) static INLINE int vavailability_i(int name) { //int d = __builtin_cpu_supports("sse2") && __builtin_cpu_supports("sse3"); int d = cpuSupportsSSE2() && cpuSupportsSSE3(); return d ? 3 : 0; } #define ISANAME "SSE3" #define DFTPRIORITY 11 #else static INLINE int vavailability_i(int name) { int d = cpuSupportsSSE2(); return d ? 3 : 0; } #define ISANAME "SSE2" #define DFTPRIORITY 10 #endif static INLINE void vprefetch_v_p(const void *ptr) { _mm_prefetch(ptr, _MM_HINT_T0); } static INLINE int vtestallones_i_vo32(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } static INLINE int vtestallones_i_vo64(vopmask g) { return _mm_movemask_epi8(g) == 0xFFFF; } // static vint2 vloadu_vi2_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } static void vstoreu_v_p_vi2(int32_t *p, vint2 v) { _mm_storeu_si128((__m128i *)p, v); } static vint vloadu_vi_p(int32_t *p) { return _mm_loadu_si128((__m128i *)p); } static void vstoreu_v_p_vi(int32_t *p, vint v) { _mm_storeu_si128((__m128i *)p, v); } // static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return _mm_and_si128(x, y); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return _mm_andnot_si128(x, y); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return _mm_or_si128(x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return _mm_xor_si128(x, y); } static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return _mm_and_si128(x, y); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return _mm_andnot_si128(x, y); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return _mm_or_si128(x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return _mm_xor_si128(x, y); } static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { return _mm_and_si128(x, y); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return _mm_or_si128(x, y); } static INLINE vmask vandnot_vm_vo64_vm(vmask x, vmask y) { return _mm_andnot_si128(x, y); } static INLINE vmask vxor_vm_vo64_vm(vmask x, vmask y) { return _mm_xor_si128(x, y); } static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return _mm_and_si128(x, y); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return _mm_or_si128(x, y); } static INLINE vmask vandnot_vm_vo32_vm(vmask x, vmask y) { return _mm_andnot_si128(x, y); } static INLINE vmask vxor_vm_vo32_vm(vmask x, vmask y) { return _mm_xor_si128(x, y); } static INLINE vopmask vcast_vo32_vo64(vopmask m) { return _mm_shuffle_epi32(m, 0x08); } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return _mm_shuffle_epi32(m, 0x50); } // static INLINE vint vrint_vi_vd(vdouble vd) { return _mm_cvtpd_epi32(vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return _mm_cvttpd_epi32(vd); } static INLINE vdouble vcast_vd_vi(vint vi) { return _mm_cvtepi32_pd(vi); } static INLINE vint vcast_vi_i(int i) { return _mm_set_epi32(0, 0, i, i); } static INLINE vint2 vcastu_vi2_vi(vint vi) { return _mm_and_si128(_mm_shuffle_epi32(vi, 0x73), _mm_set_epi32(-1, 0, -1, 0)); } static INLINE vint vcastu_vi_vi2(vint2 vi) { return _mm_shuffle_epi32(vi, 0x0d); } #ifdef __SSE4_1__ static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return _mm_round_pd(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vfloat vtruncate_vf_vf(vfloat vf) { return _mm_round_ps(vf, _MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC); } static INLINE vfloat vrint_vf_vf(vfloat vd) { return _mm_round_ps(vd, _MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return _mm_cmpeq_epi64(x, y); } #define FULL_FP_ROUNDING #else static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return vcast_vd_vi(vtruncate_vi_vd(vd)); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return vcast_vd_vi(vrint_vi_vd(vd)); } static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { vmask t = _mm_cmpeq_epi32(x, y); return vand_vm_vm_vm(t, _mm_shuffle_epi32(t, 0xb1)); } #endif static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return _mm_add_epi64(x, y); } static INLINE vmask vcast_vm_i_i(int i0, int i1) { return _mm_set_epi32(i0, i1, i0, i1); } // static INLINE vdouble vcast_vd_d(double d) { return _mm_set1_pd(d); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return _mm_castpd_si128(vd); } static INLINE vint2 vreinterpret_vi2_vd(vdouble vd) { return _mm_castpd_si128(vd); } static INLINE vdouble vreinterpret_vd_vi2(vint2 vi) { return _mm_castsi128_pd(vi); } static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return _mm_castsi128_pd(vm); } static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_add_pd(x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return _mm_sub_pd(x, y); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return _mm_mul_pd(x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return _mm_div_pd(x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return _mm_div_pd(_mm_set1_pd(1), x); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return _mm_sqrt_pd(x); } static INLINE vdouble vabs_vd_vd(vdouble d) { return _mm_andnot_pd(_mm_set1_pd(-0.0), d); } static INLINE vdouble vneg_vd_vd(vdouble d) { return _mm_xor_pd(_mm_set1_pd(-0.0), d); } static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return _mm_max_pd(x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return _mm_min_pd(x, y); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpeq_pd(x, y)); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpneq_pd(x, y)); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmplt_pd(x, y)); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmple_pd(x, y)); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpgt_pd(x, y)); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return _mm_castpd_si128(_mm_cmpge_pd(x, y)); } static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return _mm_add_epi32(x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return _mm_sub_epi32(x, y); } static INLINE vint vneg_vi_vi(vint e) { return vsub_vi_vi_vi(vcast_vi_i(0), e); } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return _mm_or_si128(x, y); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return _mm_xor_si128(x, y); } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return _mm_and_si128(x, y); } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return _mm_andnot_si128(x, y); } static INLINE vint vsll_vi_vi_i(vint x, int c) { return _mm_slli_epi32(x, c); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { return _mm_srli_epi32(x, c); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return _mm_srai_epi32(x, c); } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return _mm_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm_cmpgt_epi32(x, y); } #ifdef __SSE4_1__ static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, m); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask m, vdouble x, vdouble y) { return _mm_blendv_pd(y, x, _mm_castsi128_pd(m)); } #else static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return vor_vm_vm_vm(vand_vm_vm_vm(m, x), vandnot_vm_vm_vm(m, y)); } static INLINE vdouble vsel_vd_vo_vd_vd(vopmask opmask, vdouble x, vdouble y) { return vreinterpret_vd_vm(vor_vm_vm_vm(vand_vm_vm_vm(opmask, vreinterpret_vm_vd(x)), vandnot_vm_vm_vm(opmask, vreinterpret_vm_vd(y)))); } #endif static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } static INLINE vopmask visinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmpeq_pd(vabs_vd_vd(d), _mm_set1_pd(SLEEF_INFINITY))); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmpeq_pd(d, _mm_set1_pd(SLEEF_INFINITY))); } static INLINE vopmask visminf_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmpeq_pd(d, _mm_set1_pd(-SLEEF_INFINITY))); } static INLINE vopmask visnan_vo_vd(vdouble d) { return vreinterpret_vm_vd(_mm_cmpneq_pd(d, d)); } // static INLINE vdouble vload_vd_p(const double *ptr) { return _mm_load_pd(ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm_loadu_pd(ptr); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm_store_pd(ptr, v); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm_storeu_pd(ptr, v); } static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { int a[sizeof(vint)/sizeof(int)]; vstoreu_v_p_vi(a, vi); return _mm_set_pd(ptr[a[1]], ptr[a[0]]); } #if defined(_MSC_VER) // This function is needed when debugging on MSVC. static INLINE double vcast_d_vd(vdouble v) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); return a[0]; } #endif // static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } static INLINE vint2 vrint_vi2_vf(vfloat vf) { return _mm_cvtps_epi32(vf); } static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return _mm_cvttps_epi32(vf); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return _mm_cvtepi32_ps(vcast_vm_vi2(vi)); } static INLINE vfloat vcast_vf_f(float f) { return _mm_set1_ps(f); } static INLINE vint2 vcast_vi2_i(int i) { return _mm_set1_epi32(i); } static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return _mm_castps_si128(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return _mm_castsi128_ps(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return _mm_castsi128_ps(vm); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return _mm_castps_si128(vf); } #ifndef __SSE4_1__ static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return vcast_vf_vi2(vtruncate_vi2_vf(vd)); } static INLINE vfloat vrint_vf_vf(vfloat vf) { return vcast_vf_vi2(vrint_vi2_vf(vf)); } #endif static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_add_ps(x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return _mm_sub_ps(x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return _mm_mul_ps(x, y); } static INLINE vfloat vdiv_vf_vf_vf(vfloat x, vfloat y) { return _mm_div_ps(x, y); } static INLINE vfloat vrec_vf_vf(vfloat x) { return vdiv_vf_vf_vf(vcast_vf_f(1.0f), x); } static INLINE vfloat vsqrt_vf_vf(vfloat x) { return _mm_sqrt_ps(x); } static INLINE vfloat vabs_vf_vf(vfloat f) { return vreinterpret_vf_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f))); } static INLINE vfloat vneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(d))); } static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsub_vf_vf_vf(z, vmul_vf_vf_vf(x, y)); } static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return _mm_max_ps(x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return _mm_min_ps(x, y); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpeq_ps(x, y)); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpneq_ps(x, y)); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmplt_ps(x, y)); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmple_ps(x, y)); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpgt_ps(x, y)); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return vreinterpret_vm_vf(_mm_cmpge_ps(x, y)); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return vadd_vi_vi_vi(x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return vsub_vi_vi_vi(x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return vsub_vi2_vi2_vi2(vcast_vi2_i(0), e); } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return vand_vi_vi_vi(x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return vandnot_vi_vi_vi(x, y); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return vor_vi_vi_vi(x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return vxor_vi_vi_vi(x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return vand_vi_vo_vi(x, y); } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return vandnot_vi_vo_vi(x, y); } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { return vsll_vi_vi_i(x, c); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { return vsrl_vi_vi_i(x, c); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return vsra_vi_vi_i(x, c); } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpeq_epi32(x, y); } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return _mm_cmpgt_epi32(x, y); } #ifdef __SSE4_1__ static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return _mm_blendv_epi8(y, x, m); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask m, vfloat x, vfloat y) { return _mm_blendv_ps(y, x, _mm_castsi128_ps(m)); } #else static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return vor_vi2_vi2_vi2(vand_vi2_vi2_vi2(m, x), vandnot_vi2_vi2_vi2(m, y)); } static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { return vreinterpret_vf_vm(vor_vm_vm_vm(vand_vm_vm_vm(mask, vreinterpret_vm_vf(x)), vandnot_vm_vm_vm(mask, vreinterpret_vm_vf(y)))); } #endif static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } static INLINE vopmask visinf_vo_vf(vfloat d) { return veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(SLEEF_INFINITYf)); } static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-SLEEF_INFINITYf)); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } static INLINE vfloat vload_vf_p(const float *ptr) { return _mm_load_ps(ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm_loadu_ps(ptr); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm_store_ps(ptr, v); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm_storeu_ps(ptr, v); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi) { int a[VECTLENSP]; vstoreu_v_p_vi2(a, vi); return _mm_set_ps(ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); } #ifdef _MSC_VER // This function is useful when debugging on MSVC. static INLINE float vcast_f_vf(vfloat v) { float a[VECTLENSP]; vstoreu_v_p_vf(a, v); return a[0]; } #endif // #define PNMASK ((vdouble) { +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0 }) #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(PNMASK))); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(NPMASK))); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(PNMASKf))); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(d), vreinterpret_vm_vf(NPMASKf))); } #ifdef __SSE3__ static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return _mm_addsub_pd(x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return _mm_addsub_ps(x, y); } #else static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vadd_vf_vf_vf(x, vnegpos_vf_vf(y)); } #endif static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vdouble vrev21_vd_vd(vdouble d0) { return _mm_shuffle_pd(d0, d0, 1); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { _mm_stream_pd(ptr, v); } static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { _mm_stream_pd((double *)(&ptr[2*offset]), v); } // static INLINE vfloat vrev21_vf_vf(vfloat d0) { return _mm_shuffle_ps(d0, d0, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); } static INLINE vfloat vreva2_vf_vf(vfloat d0) { return _mm_shuffle_ps(d0, d0, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { _mm_stream_ps(ptr, v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { _mm_storel_pd((double *)(ptr+(offset + step * 0)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); _mm_storeh_pd((double *)(ptr+(offset + step * 1)*2), vreinterpret_vd_vm(vreinterpret_vm_vf(v))); } sleef-3.3.1/src/arch/helpersve.h000066400000000000000000000550211333715643700165050ustar00rootroot00000000000000/*********************************************************************/ /* Copyright ARM Ltd. 2010 - 2017. */ /* Distributed under the Boost Software License, Version 1.0. */ /* (See accompanying file LICENSE.txt or copy at */ /* http://www.boost.org/LICENSE_1_0.txt) */ /*********************************************************************/ #ifndef __ARM_FEATURE_SVE #error Please specify SVE flags. #endif #include #include #include "misc.h" #if defined(VECTLENDP) || defined(VECTLENSP) #error VECTLENDP or VECTLENSP already defined #endif #if CONFIG == 1 // Vector length agnostic #define VECTLENSP (svcntw()) #define VECTLENDP (svcntd()) #define ISANAME "AArch64 SVE" #define ptrue svptrue_b8() #elif CONFIG == 8 // 256-bit vector length #define ISANAME "AArch64 SVE 256-bit" #define LOG2VECTLENDP 2 #define ptrue svptrue_pat_b8(SV_VL32) #define DFTPRIORITY 20 #elif CONFIG == 9 // 512-bit vector length #define ISANAME "AArch64 SVE 512-bit" #define LOG2VECTLENDP 3 #define ptrue svptrue_pat_b8(SV_VL64) #define DFTPRIORITY 21 #elif CONFIG == 10 // 1024-bit vector length #define ISANAME "AArch64 SVE 1024-bit" #define LOG2VECTLENDP 4 #define ptrue svptrue_pat_b8(SV_VL128) #define DFTPRIORITY 22 #elif CONFIG == 11 // 2048-bit vector length #define ISANAME "AArch64 SVE 2048-bit" #define LOG2VECTLENDP 5 #define ptrue svptrue_pat_b8(SV_VL256) #define DFTPRIORITY 23 #else #error CONFIG macro invalid or not defined #endif #ifdef LOG2VECTLENDP // For DFT, VECTLENDP and VECTLENSP are not the size of the available // vector length, but the size of the partial vectors utilized in the // computation. The appropriate VECTLENDP and VECTLENSP are chosen by // the dispatcher according to the value of svcntd(). #define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENDP (1 << LOG2VECTLENDP) #define VECTLENSP (1 << LOG2VECTLENSP) static INLINE int vavailability_i(int name) { return svcntd() >= VECTLENDP ? 3 : 0; } #else static INLINE int vavailability_i(int name) { return 3; } #endif #define ENABLE_SP #define ENABLE_FMA_SP #define ENABLE_DP #define ENABLE_FMA_DP #define FULL_FP_ROUNDING #define ACCURATE_SQRT // Mask definition typedef svint32_t vmask; typedef svbool_t vopmask; // Single precision definitions typedef svfloat32_t vfloat; typedef svint32_t vint2; // Double precision definitions typedef svfloat64_t vdouble; typedef svint32_t vint; // masking predicates #define ALL_TRUE_MASK svdup_n_s32(0xffffffff) #define ALL_FALSE_MASK svdup_n_s32(0x0) static INLINE void vprefetch_v_p(const void *ptr) {} // // // // Test if all lanes are active // // // static INLINE int vtestallones_i_vo32(vopmask g) { svbool_t pg = svptrue_b32(); return (svcntp_b32(pg, g) == svcntw()); } static INLINE int vtestallones_i_vo64(vopmask g) { svbool_t pg = svptrue_b64(); return (svcntp_b64(pg, g) == svcntd()); } // // // // // // // Vector load / store static INLINE void vstoreu_v_p_vi2(int32_t *p, vint2 v) { svst1_s32(ptrue, p, v); } static INLINE vfloat vload_vf_p(const float *ptr) { return svld1_f32(ptrue, ptr); } static INLINE vfloat vloadu_vf_p(const float *ptr) { return svld1_f32(ptrue, ptr); } static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { svst1_f32(ptrue, ptr, v); } // Basic logical operations for mask static INLINE vmask vand_vm_vm_vm(vmask x, vmask y) { return svand_s32_x(ptrue, x, y); } static INLINE vmask vandnot_vm_vm_vm(vmask x, vmask y) { return svbic_s32_x(ptrue, y, x); } static INLINE vmask vor_vm_vm_vm(vmask x, vmask y) { return svorr_s32_x(ptrue, x, y); } static INLINE vmask vxor_vm_vm_vm(vmask x, vmask y) { return sveor_s32_x(ptrue, x, y); } static INLINE vmask vadd64_vm_vm_vm(vmask x, vmask y) { return svreinterpret_s32_s64( svadd_s64_x(ptrue, svreinterpret_s64_s32(x), svreinterpret_s64_s32(y))); } // Mask <--> single precision reinterpret static INLINE vmask vreinterpret_vm_vf(vfloat vf) { return svreinterpret_s32_f32(vf); } static INLINE vfloat vreinterpret_vf_vm(vmask vm) { return svreinterpret_f32_s32(vm); } static INLINE vfloat vreinterpret_vf_vi2(vint2 vm) { return svreinterpret_f32_s32(vm); } static INLINE vint2 vreinterpret_vi2_vf(vfloat vf) { return svreinterpret_s32_f32(vf); } static INLINE vint2 vcast_vi2_vm(vmask vm) { return vm; } static INLINE vmask vcast_vm_vi2(vint2 vi) { return vi; } // Conditional select static INLINE vint2 vsel_vi2_vm_vi2_vi2(vmask m, vint2 x, vint2 y) { return svsel_s32(svcmpeq_s32(ptrue, m, ALL_TRUE_MASK), x, y); } /****************************************/ /* Single precision FP operations */ /****************************************/ // Broadcast static INLINE vfloat vcast_vf_f(float f) { return svdup_n_f32(f); } // Add, Sub, Mul, Reciprocal 1/x, Division, Square root static INLINE vfloat vadd_vf_vf_vf(vfloat x, vfloat y) { return svadd_f32_x(ptrue, x, y); } static INLINE vfloat vsub_vf_vf_vf(vfloat x, vfloat y) { return svsub_f32_x(ptrue, x, y); } static INLINE vfloat vmul_vf_vf_vf(vfloat x, vfloat y) { return svmul_f32_x(ptrue, x, y); } static INLINE vfloat vrec_vf_vf(vfloat d) { return svdivr_n_f32_x(ptrue, d, 1.0f); } static INLINE vfloat vdiv_vf_vf_vf(vfloat n, vfloat d) { return svdiv_f32_x(ptrue, n, d); } static INLINE vfloat vsqrt_vf_vf(vfloat d) { return svsqrt_f32_x(ptrue, d); } // |x|, -x static INLINE vfloat vabs_vf_vf(vfloat f) { return svabs_f32_x(ptrue, f); } static INLINE vfloat vneg_vf_vf(vfloat f) { return svneg_f32_x(ptrue, f); } // max, min static INLINE vfloat vmax_vf_vf_vf(vfloat x, vfloat y) { return svmax_f32_x(ptrue, x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return svmin_f32_x(ptrue, x, y); } // int <--> float conversions static INLINE vint2 vtruncate_vi2_vf(vfloat vf) { return svcvt_s32_f32_x(ptrue, vf); } static INLINE vfloat vcast_vf_vi2(vint2 vi) { return svcvt_f32_s32_x(ptrue, vi); } static INLINE vint2 vcast_vi2_i(int i) { return svdup_n_s32(i); } static INLINE vint2 vrint_vi2_vf(vfloat d) { return svcvt_s32_f32_x(ptrue, svrinta_f32_x(ptrue, d)); } // Multiply accumulate: z = z + x * y static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return svmad_f32_x(ptrue, x, y, z); } // Multiply subtract: z = z - x * y static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return svmsb_f32_x(ptrue, x, y, z); } // fused multiply add / sub static INLINE vfloat vfma_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // z + x * y return svmad_f32_x(ptrue, x, y, z); } static INLINE vfloat vfmanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // z - x * y return svmsb_f32_x(ptrue, x, y, z); } static INLINE vfloat vfmapn_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { // x * y - z return svnmsb_f32_x(ptrue, x, y, z); } // conditional select static INLINE vfloat vsel_vf_vo_vf_vf(vopmask mask, vfloat x, vfloat y) { return svsel_f32(mask, x, y); } // // // // // // static INLINE CONST vfloat vsel_vf_vo_f_f(vopmask o, float v1, float v0) { return vsel_vf_vo_vf_vf(o, vcast_vf_f(v1), vcast_vf_f(v0)); } static INLINE vfloat vsel_vf_vo_vo_f_f_f(vopmask o0, vopmask o1, float d0, float d1, float d2) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_f_f(o1, d1, d2)); } static INLINE vfloat vsel_vf_vo_vo_vo_f_f_f_f(vopmask o0, vopmask o1, vopmask o2, float d0, float d1, float d2, float d3) { return vsel_vf_vo_vf_vf(o0, vcast_vf_f(d0), vsel_vf_vo_vf_vf(o1, vcast_vf_f(d1), vsel_vf_vo_f_f(o2, d2, d3))); } // // // // // // // truncate static INLINE vfloat vtruncate_vf_vf(vfloat vd) { return svrintz_f32_x(ptrue, vd); } // // // // Round float // // // static INLINE vfloat vrint_vf_vf(vfloat vf) { return svrinta_f32_x(svptrue_b32(), vf); } // // // // // // /***************************************/ /* Single precision integer operations */ /***************************************/ // Add, Sub, Neg (-x) static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return svadd_s32_x(ptrue, x, y); } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return svsub_s32_x(ptrue, x, y); } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return svneg_s32_x(ptrue, e); } // Logical operations static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return svand_s32_x(ptrue, x, y); } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return svbic_s32_x(ptrue, y, x); } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return svorr_s32_x(ptrue, x, y); } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return sveor_s32_x(ptrue, x, y); } // Shifts #define vsll_vi2_vi2_i(x, c) svlsl_n_s32_x(ptrue, x, c) #define vsrl_vi2_vi2_i(x, c) \ svreinterpret_s32_u32(svlsr_n_u32_x(ptrue, svreinterpret_u32_s32(x), c)) #define vsra_vi2_vi2_i(x, c) svasr_n_s32_x(ptrue, x, c) // Comparison returning integers static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return svsel_s32(svcmpge_s32(ptrue, x, y), ALL_TRUE_MASK, ALL_FALSE_MASK); } // conditional select static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask m, vint2 x, vint2 y) { return svsel_s32(m, x, y); } /****************************************/ /* opmask operations */ /****************************************/ // single precision FP static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return svcmpeq_f32(ptrue, x, y); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return svcmpne_f32(ptrue, x, y); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return svcmplt_f32(ptrue, x, y); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return svcmple_f32(ptrue, x, y); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return svcmpgt_f32(ptrue, x, y); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return svcmpge_f32(ptrue, x, y); } static INLINE vopmask visinf_vo_vf(vfloat d) { return svcmpeq_n_f32(ptrue, vabs_vf_vf(d), SLEEF_INFINITYf); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return svcmpeq_n_f32(ptrue, d, SLEEF_INFINITYf); } static INLINE vopmask visminf_vo_vf(vfloat d) { return svcmpeq_n_f32(ptrue, d, -SLEEF_INFINITYf); } static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); } // integers static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return svcmpeq_s32(ptrue, x, y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return svcmpgt_s32(ptrue, x, y); } // logical opmask static INLINE vopmask vand_vo_vo_vo(vopmask x, vopmask y) { return svand_b_z(ptrue, x, y); } static INLINE vopmask vandnot_vo_vo_vo(vopmask x, vopmask y) { return svbic_b_z(ptrue, y, x); } static INLINE vopmask vor_vo_vo_vo(vopmask x, vopmask y) { return svorr_b_z(ptrue, x, y); } static INLINE vopmask vxor_vo_vo_vo(vopmask x, vopmask y) { return sveor_b_z(ptrue, x, y); } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { // This needs to be zeroing to prevent asinf and atanf denormal test // failing. return svand_s32_z(x, y, y); } // bitmask logical operations static INLINE vmask vand_vm_vo32_vm(vopmask x, vmask y) { return svsel_s32(x, y, ALL_FALSE_MASK); } static INLINE vmask vandnot_vm_vo32_vm(vopmask x, vmask y) { return svsel_s32(x, ALL_FALSE_MASK, y); } static INLINE vmask vor_vm_vo32_vm(vopmask x, vmask y) { return svsel_s32(x, ALL_TRUE_MASK, y); } // broadcast bitmask static INLINE vmask vcast_vm_i_i(int i0, int i1) { return svreinterpret_s32_u64( svdup_n_u64((0xffffffff & (uint64_t)i1) | (((uint64_t)i0) << 32))); } /*********************************/ /* SVE for double precision math */ /*********************************/ // Vector load/store static INLINE vdouble vload_vd_p(const double *ptr) { return svld1_f64(ptrue, ptr); } static INLINE vdouble vloadu_vd_p(const double *ptr) { return svld1_f64(ptrue, ptr); } static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { svst1_f64(ptrue, ptr, v); } static INLINE void vstoreu_v_p_vi(int *ptr, vint v) { svst1w_s64(ptrue, ptr, svreinterpret_s64_s32(v)); } static vint vloadu_vi_p(int32_t *p) { return svreinterpret_s32_s64(svld1uw_s64(ptrue, (uint32_t *)p)); } // Reinterpret static INLINE vdouble vreinterpret_vd_vm(vmask vm) { return svreinterpret_f64_s32(vm); } static INLINE vmask vreinterpret_vm_vd(vdouble vd) { return svreinterpret_s32_f64(vd); } static INLINE vdouble vreinterpret_vd_vi2(vint2 x) { return svreinterpret_f64_s32(x); } static INLINE vint2 vreinterpret_vi2_vd(vdouble x) { return svreinterpret_s32_f64(x); } static INLINE vint2 vcastu_vi2_vi(vint x) { return svreinterpret_s32_s64( svlsl_n_s64_x(ptrue, svreinterpret_s64_s32(x), 32)); } static INLINE vint vcastu_vi_vi2(vint2 x) { return svreinterpret_s32_s64( svlsr_n_s64_x(ptrue, svreinterpret_s64_s32(x), 32)); } static INLINE vdouble vcast_vd_vi(vint vi) { return svcvt_f64_s32_x(ptrue, vi); } // Splat static INLINE vdouble vcast_vd_d(double d) { return svdup_n_f64(d); } // Conditional select static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return svsel_f64(o, x, y); } static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0)); } static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2)); } static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3))); } static INLINE vint vsel_vi_vo_vi_vi(vopmask o, vint x, vint y) { return svsel_s32(o, x, y); } // truncate static INLINE vdouble vtruncate_vd_vd(vdouble vd) { return svrintz_f64_x(ptrue, vd); } static INLINE vint vtruncate_vi_vd(vdouble vd) { return svcvt_s32_f64_x(ptrue, vd); } static INLINE vint vrint_vi_vd(vdouble vd) { return svcvt_s32_f64_x(ptrue, svrinta_f64_x(ptrue, vd)); } static INLINE vdouble vrint_vd_vd(vdouble vd) { return svrinta_f64_x(ptrue, vd); } // FP math operations static INLINE vdouble vadd_vd_vd_vd(vdouble x, vdouble y) { return svadd_f64_x(ptrue, x, y); } static INLINE vdouble vsub_vd_vd_vd(vdouble x, vdouble y) { return svsub_f64_x(ptrue, x, y); } static INLINE vdouble vneg_vd_vd(vdouble x) { return svneg_f64_x(ptrue, x); } static INLINE vdouble vmul_vd_vd_vd(vdouble x, vdouble y) { return svmul_f64_x(ptrue, x, y); } static INLINE vdouble vdiv_vd_vd_vd(vdouble x, vdouble y) { return svdiv_f64_x(ptrue, x, y); } static INLINE vdouble vrec_vd_vd(vdouble x) { return svdivr_n_f64_x(ptrue, x, 1.0); } static INLINE vdouble vsqrt_vd_vd(vdouble x) { return svsqrt_f64_x(ptrue, x); } static INLINE vdouble vabs_vd_vd(vdouble x) { return svabs_f64_x(ptrue, x); } static INLINE vdouble vmax_vd_vd_vd(vdouble x, vdouble y) { return svmax_f64_x(ptrue, x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return svmin_f64_x(ptrue, x, y); } // Multiply accumulate / subtract static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z = x*y + z return svmad_f64_x(ptrue, x, y, z); } static INLINE vdouble vfma_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z + x * y return svmad_f64_x(ptrue, x, y, z); } static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z - x * y return svmsb_f64_x(ptrue, x, y, z); } static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // x * y - z return svnmsb_f64_x(ptrue, x, y, z); } static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z = x * y - z return svnmsb_f64_x(ptrue, x, y, z); } // Float comparison static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return svcmplt_f64(ptrue, x, y); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return svcmpeq_f64(ptrue, x, y); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return svcmpgt_f64(ptrue, x, y); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return svcmpge_f64(ptrue, x, y); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return svcmpne_f64(ptrue, x, y); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return svcmple_f64(ptrue, x, y); } // predicates static INLINE vopmask visnan_vo_vd(vdouble vd) { return svcmpne_f64(ptrue, vd, vd); } static INLINE vopmask visinf_vo_vd(vdouble vd) { return svcmpeq_n_f64(ptrue, svabs_f64_x(ptrue, vd), SLEEF_INFINITY); } static INLINE vopmask vispinf_vo_vd(vdouble vd) { return svcmpeq_n_f64(ptrue, vd, SLEEF_INFINITY); } static INLINE vopmask visminf_vo_vd(vdouble vd) { return svcmpeq_n_f64(ptrue, vd, -SLEEF_INFINITY); } // Comparing bit masks static INLINE vopmask veq64_vo_vm_vm(vmask x, vmask y) { return svcmpeq_s64(ptrue, svreinterpret_s64_s32(x), svreinterpret_s64_s32(y)); } // pure predicate operations static INLINE vopmask vcast_vo32_vo64(vopmask o) { return o; } static INLINE vopmask vcast_vo64_vo32(vopmask o) { return o; } // logical integer operations static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { // This needs to be a zeroing instruction because we need to make // sure that the inactive elements for the unpacked integers vector // are zero. return svand_s32_z(x, y, y); } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return svsel_s32(x, ALL_FALSE_MASK, y); } #define vsra_vi_vi_i(x, c) svasr_n_s32_x(ptrue, x, c) #define vsll_vi_vi_i(x, c) svlsl_n_s32_x(ptrue, x, c) #define vsrl_vi_vi_i(x, c) svlsr_n_s32_x(ptrue, x, c) static INLINE vint vand_vi_vi_vi(vint x, vint y) { return svand_s32_x(ptrue, x, y); } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return svbic_s32_x(ptrue, y, x); } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return sveor_s32_x(ptrue, x, y); } // integer math static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return svadd_s32_x(ptrue, x, y); } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return svsub_s32_x(ptrue, x, y); } static INLINE vint vneg_vi_vi(vint x) { return svneg_s32_x(ptrue, x); } // integer comparison static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return svcmpgt_s32(ptrue, x, y); } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return svcmpeq_s32(ptrue, x, y); } // Splat static INLINE vint vcast_vi_i(int i) { return svdup_n_s32(i); } // bitmask logical operations static INLINE vmask vand_vm_vo64_vm(vopmask x, vmask y) { // This needs to be a zeroing instruction because we need to make // sure that the inactive elements for the unpacked integers vector // are zero. return svreinterpret_s32_s64( svand_s64_z(x, svreinterpret_s64_s32(y), svreinterpret_s64_s32(y))); } static INLINE vmask vandnot_vm_vo64_vm(vopmask x, vmask y) { return svreinterpret_s32_s64(svsel_s64( x, svreinterpret_s64_s32(ALL_FALSE_MASK), svreinterpret_s64_s32(y))); } static INLINE vmask vor_vm_vo64_vm(vopmask x, vmask y) { return svreinterpret_s32_s64(svsel_s64( x, svreinterpret_s64_s32(ALL_TRUE_MASK), svreinterpret_s64_s32(y))); } static INLINE vfloat vrev21_vf_vf(vfloat vf) { return svreinterpret_f32_u64(svrevw_u64_x(ptrue, svreinterpret_u64_f32(vf))); } // Comparison returning integer static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return svsel_s32(svcmpeq_s32(ptrue, x, y), ALL_TRUE_MASK, ALL_FALSE_MASK); } // Gather static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { return svld1_gather_s64index_f64(ptrue, ptr, svreinterpret_s64_s32(vi)); } static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { return svld1_gather_s32index_f32(ptrue, ptr, vi2); } // Operations for DFT static INLINE vdouble vposneg_vd_vd(vdouble d) { return svneg_f64_m(d, svdupq_n_b64(false, true), d); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return svneg_f64_m(d, svdupq_n_b64(true, false), d); } static INLINE vfloat vposneg_vf_vf(vfloat d) { return svneg_f32_m(d, svdupq_n_b32(false, true, false, true), d); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return svneg_f32_m(d, svdupq_n_b32(true, false, true, false), d); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat d0, vfloat d1) { return vadd_vf_vf_vf(d0, vnegpos_vf_vf(d1)); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vfma_vd_vd_vd_vd(x, y, vnegpos_vd_vd(z)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vfma_vf_vf_vf_vf(x, y, vnegpos_vf_vf(z)); } // static INLINE vdouble vrev21_vd_vd(vdouble x) { return svzip1_f64(svuzp2_f64(x, x), svuzp1_f64(x, x)); } static INLINE vdouble vreva2_vd_vd(vdouble vd) { svint64_t x = svindex_s64((VECTLENDP-1), -1); x = svzip1_s64(svuzp2_s64(x, x), svuzp1_s64(x, x)); return svtbl_f64(vd, svreinterpret_u64_s64(x)); } static INLINE vfloat vreva2_vf_vf(vfloat vf) { svint32_t x = svindex_s32((VECTLENSP-1), -1); x = svzip1_s32(svuzp2_s32(x, x), svuzp1_s32(x, x)); return svtbl_f32(vf, svreinterpret_u32_s32(x)); } // static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { svst1_scatter_u64index_f64(ptrue, ptr + offset*2, svzip1_u64(svindex_u64(0, step*2), svindex_u64(1, step*2)), v); } static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { svst1_scatter_u32index_f32(ptrue, ptr + offset*2, svzip1_u32(svindex_u32(0, step*2), svindex_u32(1, step*2)), v); } static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { vstoreu_v_p_vd(ptr, v); } static INLINE void vstream_v_p_vd(double *ptr, vdouble v) { vstore_v_p_vd(ptr, v); } static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vstoreu_v_p_vf(ptr, v); } static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vscatter2_v_p_i_i_vd(ptr, offset, step, v); } static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } // These functions are for debugging static double vcast_d_vd(vdouble v) { double a[svcntd()]; vstoreu_v_p_vd(a, v); return a[0]; } static float vcast_f_vf(vfloat v) { float a[svcntw()]; vstoreu_v_p_vf(a, v); return a[0]; } static int vcast_i_vi(vint v) { int a[svcntw()]; vstoreu_v_p_vi(a, v); return a[0]; } static int vcast_i_vi2(vint2 v) { int a[svcntw()]; vstoreu_v_p_vi2(a, v); return a[0]; } sleef-3.3.1/src/arch/helpervecext.h000066400000000000000000001062541333715643700172130ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include "misc.h" #ifndef CONFIG #error CONFIG macro not defined #endif #define ENABLE_DP #define ENABLE_SP #define LOG2VECTLENDP CONFIG #define VECTLENDP (1 << LOG2VECTLENDP) #define LOG2VECTLENSP (LOG2VECTLENDP+1) #define VECTLENSP (1 << LOG2VECTLENSP) #define DFTPRIORITY LOG2VECTLENDP #if defined(__clang__) #define ISANAME "Clang Vector Extension" typedef uint32_t vmask __attribute__((ext_vector_type(VECTLENDP*2))); typedef uint32_t vopmask __attribute__((ext_vector_type(VECTLENDP*2))); typedef double vdouble __attribute__((ext_vector_type(VECTLENDP))); typedef int32_t vint __attribute__((ext_vector_type(VECTLENDP))); typedef float vfloat __attribute__((ext_vector_type(VECTLENDP*2))); typedef int32_t vint2 __attribute__((ext_vector_type(VECTLENDP*2))); #ifdef ENABLE_LONGDOUBLE typedef uint8_t vmaskl __attribute__((ext_vector_type(sizeof(long double)*VECTLENDP))); typedef long double vlongdouble __attribute__((ext_vector_type(VECTLENDP))); #endif #ifdef Sleef_quad2_DEFINED typedef uint8_t vmaskq __attribute__((ext_vector_type(sizeof(Sleef_quad)*VECTLENDP))); #ifdef ENABLE_LONGDOUBLE typedef Sleef_quad vquad __attribute__((ext_vector_type(VECTLENDP))); #endif #endif #elif defined(__GNUC__) #define ISANAME "GCC Vector Extension" typedef uint32_t vmask __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP*2))); typedef uint32_t vopmask __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP*2))); typedef double vdouble __attribute__((vector_size(sizeof(double)*VECTLENDP))); typedef int32_t vint __attribute__((vector_size(sizeof(int32_t)*VECTLENDP))); typedef float vfloat __attribute__((vector_size(sizeof(float)*VECTLENDP*2))); typedef int32_t vint2 __attribute__((vector_size(sizeof(int32_t)*VECTLENDP*2))); #ifdef ENABLE_LONGDOUBLE typedef uint8_t vmaskl __attribute__((vector_size(sizeof(long double)*VECTLENDP))); typedef long double vlongdouble __attribute__((vector_size(sizeof(long double)*VECTLENDP))); #endif #ifdef Sleef_quad2_DEFINED typedef uint8_t vmaskq __attribute__((vector_size(sizeof(Sleef_quad)*VECTLENDP))); typedef Sleef_quad vquad __attribute__((vector_size(sizeof(Sleef_quad)*VECTLENDP))); #endif #endif // #if VECTLENDP == 2 static INLINE vopmask vcast_vo32_vo64(vopmask m) { return (vopmask){ m[1], m[3], 0, 0 }; } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return (vopmask){ m[0], m[0], m[1], m[1] }; } static INLINE vint vcast_vi_i(int i) { return (vint) { i, i }; } static INLINE vint2 vcast_vi2_i(int i) { return (vint2) { i, i, i, i }; } static INLINE vfloat vcast_vf_f(float f) { return (vfloat) { f, f, f, f }; } static INLINE vdouble vcast_vd_d(double d) { return (vdouble) { d, d }; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vcast_vl_l(long double d) { return (vlongdouble) { d, d }; } #endif #ifdef Sleef_quad2_DEFINED static INLINE vquad vcast_vq_q(Sleef_quad d) { return (vquad) { d, d }; } #endif static INLINE vmask vcast_vm_i_i(int h, int l) { return (vmask){ l, h, l, h }; } static INLINE vint2 vcastu_vi2_vi(vint vi) { return (vint2){ 0, vi[0], 0, vi[1] }; } static INLINE vint vcastu_vi_vi2(vint2 vi2) { return (vint){ vi2[1], vi2[3] }; } static INLINE vint vreinterpretFirstHalf_vi_vi2(vint2 vi2) { return (vint){ vi2[0], vi2[1] }; } static INLINE vint2 vreinterpretFirstHalf_vi2_vi(vint vi) { return (vint2){ vi[0], vi[1], 0, 0 }; } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return (vdouble) { vd[1], vd[0] }; } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return vd; } static INLINE vfloat vrev21_vf_vf(vfloat vd) { return (vfloat) { vd[1], vd[0], vd[3], vd[2] }; } static INLINE vfloat vreva2_vf_vf(vfloat vd) { return (vfloat) { vd[2], vd[3], vd[0], vd[1] }; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vrev21_vl_vl(vlongdouble vd) { return (vlongdouble) { vd[1], vd[0] }; } static INLINE vlongdouble vreva2_vl_vl(vlongdouble vd) { return vd; } static INLINE vlongdouble vposneg_vl_vl(vlongdouble vd) { return (vlongdouble) { +vd[0], -vd[1] }; } static INLINE vlongdouble vnegpos_vl_vl(vlongdouble vd) { return (vlongdouble) { -vd[0], +vd[1] }; } #endif #ifdef Sleef_quad2_DEFINED static INLINE vquad vrev21_vq_vq(vquad vd) { return (vquad) { vd[1], vd[0] }; } static INLINE vquad vreva2_vq_vq(vquad vd) { return vd; } static INLINE vquad vposneg_vq_vq(vquad vd) { return (vquad) { +vd[0], -vd[1] }; } static INLINE vquad vnegpos_vq_vq(vquad vd) { return (vquad) { -vd[0], +vd[1] }; } #endif #define PNMASK ((vdouble) { +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0 }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)PNMASK); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)NPMASK); } #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vfloat vposneg_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)PNMASKf); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)NPMASKf); } #elif VECTLENDP == 4 static INLINE vopmask vcast_vo32_vo64(vopmask m) { return (vopmask){ m[1], m[3], m[5], m[7], 0, 0, 0, 0 }; } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return (vopmask){ m[0], m[0], m[1], m[1], m[2], m[2], m[3], m[3] }; } static INLINE vint vcast_vi_i(int i) { return (vint) { i, i, i, i }; } static INLINE vint2 vcast_vi2_i(int i) { return (vint2) { i, i, i, i, i, i, i, i }; } static INLINE vfloat vcast_vf_f(float f) { return (vfloat) { f, f, f, f, f, f, f, f }; } static INLINE vdouble vcast_vd_d(double d) { return (vdouble) { d, d, d, d }; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vcast_vl_l(long double d) { return (vlongdouble) { d, d, d, d }; } #endif static INLINE vmask vcast_vm_i_i(int h, int l) { return (vmask){ l, h, l, h, l, h, l, h }; } static INLINE vint2 vcastu_vi2_vi(vint vi) { return (vint2){ 0, vi[0], 0, vi[1], 0, vi[2], 0, vi[3] }; } static INLINE vint vcastu_vi_vi2(vint2 vi2) { return (vint){ vi2[1], vi2[3], vi2[5], vi2[7] }; } static INLINE vint vreinterpretFirstHalf_vi_vi2(vint2 vi2) { return (vint){ vi2[0], vi2[1], vi2[2], vi2[3] }; } static INLINE vint2 vreinterpretFirstHalf_vi2_vi(vint vi) { return (vint2){ vi[0], vi[1], vi[2], vi[3], 0, 0, 0, 0 }; } #define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0 }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)PNMASK); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)NPMASK); } #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vfloat vposneg_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)PNMASKf); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)NPMASKf); } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return (vdouble) { vd[1], vd[0], vd[3], vd[2] }; } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return (vdouble) { vd[2], vd[3], vd[0], vd[1] }; } static INLINE vfloat vrev21_vf_vf(vfloat vd) { return (vfloat) { vd[1], vd[0], vd[3], vd[2], vd[5], vd[4], vd[7], vd[6] }; } static INLINE vfloat vreva2_vf_vf(vfloat vd) { return (vfloat) { vd[6], vd[7], vd[4], vd[5], vd[2], vd[3], vd[0], vd[1] }; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vrev21_vl_vl(vlongdouble vd) { return (vlongdouble) { vd[1], vd[0], vd[3], vd[2] }; } static INLINE vlongdouble vreva2_vl_vl(vlongdouble vd) { return (vlongdouble) { vd[2], vd[3], vd[0], vd[1] }; } static INLINE vlongdouble vposneg_vl_vl(vlongdouble vd) { return (vlongdouble) { +vd[0], -vd[1], +vd[2], -vd[3] }; } static INLINE vlongdouble vnegpos_vl_vl(vlongdouble vd) { return (vlongdouble) { -vd[0], +vd[1], -vd[2], +vd[3] }; } #endif #elif VECTLENDP == 8 static INLINE vopmask vcast_vo32_vo64(vopmask m) { return (vopmask){ m[1], m[3], m[5], m[7], m[9], m[11], m[13], m[15], 0, 0, 0, 0, 0, 0, 0, 0 }; } static INLINE vopmask vcast_vo64_vo32(vopmask m) { return (vopmask){ m[0], m[0], m[1], m[1], m[2], m[2], m[3], m[3], m[4], m[4], m[5], m[5], m[6], m[6], m[7], m[7] }; } static INLINE vint vcast_vi_i(int i) { return (vint) { i, i, i, i, i, i, i, i }; } static INLINE vint2 vcast_vi2_i(int i) { return (vint2) { i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i }; } static INLINE vfloat vcast_vf_f(float f) { return (vfloat) { f, f, f, f, f, f, f, f, f, f, f, f, f, f, f, f }; } static INLINE vdouble vcast_vd_d(double d) { return (vdouble) { d, d, d, d, d, d, d, d }; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vcast_vl_l(long double d) { return (vlongdouble) { d, d, d, d, d, d, d, d }; } #endif static INLINE vmask vcast_vm_i_i(int h, int l) { return (vmask){ l, h, l, h, l, h, l, h, l, h, l, h, l, h, l, h }; } static INLINE vint2 vcastu_vi2_vi(vint vi) { return (vint2){ 0, vi[0], 0, vi[1], 0, vi[2], 0, vi[3], 0, vi[4], 0, vi[5], 0, vi[6], 0, vi[7] }; } static INLINE vint vcastu_vi_vi2(vint2 vi2) { return (vint){ vi2[1], vi2[3], vi2[5], vi2[7], vi2[9], vi2[11], vi2[13], vi2[15] }; } static INLINE vint vreinterpretFirstHalf_vi_vi2(vint2 vi2) { return (vint){ vi2[0], vi2[1], vi2[2], vi2[3], vi2[4], vi2[5], vi2[6], vi2[7] }; } static INLINE vint2 vreinterpretFirstHalf_vi2_vi(vint vi) { return (vint2){ vi[0], vi[1], vi[2], vi[3], vi[4], vi[5], vi[6], vi[7], 0, 0, 0, 0, 0, 0, 0, 0 }; } #define PNMASK ((vdouble) { +0.0, -0.0, +0.0, -0.0, +0.0, -0.0, +0.0, -0.0 }) #define NPMASK ((vdouble) { -0.0, +0.0, -0.0, +0.0, -0.0, +0.0, -0.0, +0.0 }) static INLINE vdouble vposneg_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)PNMASK); } static INLINE vdouble vnegpos_vd_vd(vdouble d) { return (vdouble)((vmask)d ^ (vmask)NPMASK); } #define PNMASKf ((vfloat) { +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f }) #define NPMASKf ((vfloat) { -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f }) static INLINE vfloat vposneg_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)PNMASKf); } static INLINE vfloat vnegpos_vf_vf(vfloat d) { return (vfloat)((vmask)d ^ (vmask)NPMASKf); } static INLINE vdouble vrev21_vd_vd(vdouble vd) { return (vdouble) { vd[1], vd[0], vd[3], vd[2], vd[5], vd[4], vd[7], vd[6] }; } static INLINE vdouble vreva2_vd_vd(vdouble vd) { return (vdouble) { vd[6], vd[7], vd[4], vd[5], vd[2], vd[3], vd[0], vd[1] }; } static INLINE vfloat vrev21_vf_vf(vfloat vd) { return (vfloat) { vd[1], vd[0], vd[3], vd[2], vd[5], vd[4], vd[7], vd[6], vd[9], vd[8], vd[11], vd[10], vd[13], vd[12], vd[15], vd[14] }; } static INLINE vfloat vreva2_vf_vf(vfloat vd) { return (vfloat) { vd[14], vd[15], vd[12], vd[13], vd[10], vd[11], vd[8], vd[9], vd[6], vd[7], vd[4], vd[5], vd[2], vd[3], vd[0], vd[1]}; } #ifdef ENABLE_LONGDOUBLE static INLINE vlongdouble vrev21_vl_vl(vlongdouble vd) { return (vlongdouble) { vd[1], vd[0], vd[3], vd[2], vd[5], vd[4], vd[7], vd[6] }; } static INLINE vlongdouble vreva2_vl_vl(vlongdouble vd) { return (vlongdouble) { vd[6], vd[7], vd[4], vd[5], vd[2], vd[3], vd[0], vd[1] }; } static INLINE vlongdouble vposneg_vl_vl(vlongdouble vd) { return (vlongdouble) { +vd[0], -vd[1], +vd[2], -vd[3], +vd[4], -vd[5], +vd[6], -vd[7] }; } static INLINE vlongdouble vnegpos_vl_vl(vlongdouble vd) { return (vlongdouble) { -vd[0], +vd[1], -vd[2], +vd[3], -vd[4], +vd[5], -vd[6], +vd[7] }; } #endif #else static INLINE vint vcast_vi_i(int k) { vint ret; for(int i=0;i y), x, y); } static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) { return vsel_vd_vo_vd_vd((vopmask)(x < y), x, y); } static INLINE vdouble vsubadd_vd_vd_vd(vdouble x, vdouble y) { return vadd_vd_vd_vd(x, vnegpos_vd_vd(y)); } static INLINE vdouble vmlsubadd_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { return vsubadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z); } static INLINE vopmask veq_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x == y); } static INLINE vopmask vneq_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x != y); } static INLINE vopmask vlt_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x < y); } static INLINE vopmask vle_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x <= y); } static INLINE vopmask vgt_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x > y); } static INLINE vopmask vge_vo_vd_vd(vdouble x, vdouble y) { return (vopmask)(x >= y); } static INLINE vint vadd_vi_vi_vi(vint x, vint y) { return x + y; } static INLINE vint vsub_vi_vi_vi(vint x, vint y) { return x - y; } static INLINE vint vneg_vi_vi(vint e) { return -e; } static INLINE vint vand_vi_vi_vi(vint x, vint y) { return x & y; } static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { return y & ~x; } static INLINE vint vor_vi_vi_vi(vint x, vint y) { return x | y; } static INLINE vint vxor_vi_vi_vi(vint x, vint y) { return x ^ y; } static INLINE vint vand_vi_vo_vi(vopmask x, vint y) { return vreinterpretFirstHalf_vi_vi2((vint2)x) & y; } static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { return y & ~vreinterpretFirstHalf_vi_vi2((vint2)x); } static INLINE vint vsll_vi_vi_i(vint x, int c) { #if defined(__clang__) typedef uint32_t vu __attribute__((ext_vector_type(VECTLENDP))); #else typedef uint32_t vu __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP))); #endif return (vint)(((vu)x) << c); } static INLINE vint vsrl_vi_vi_i(vint x, int c) { #if defined(__clang__) typedef uint32_t vu __attribute__((ext_vector_type(VECTLENDP))); #else typedef uint32_t vu __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP))); #endif return (vint)(((vu)x) >> c); } static INLINE vint vsra_vi_vi_i(vint x, int c) { return x >> c; } static INLINE vint veq_vi_vi_vi(vint x, vint y) { return x == y; } static INLINE vint vgt_vi_vi_vi(vint x, vint y) { return x > y; } static INLINE vopmask veq_vo_vi_vi(vint x, vint y) { return (vopmask)vreinterpretFirstHalf_vi2_vi(x == y); } static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return (vopmask)vreinterpretFirstHalf_vi2_vi(x > y);} static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return vor_vi_vi_vi(vand_vi_vi_vi(vreinterpretFirstHalf_vi_vi2((vint2)m), x), vandnot_vi_vi_vi(vreinterpretFirstHalf_vi_vi2((vint2)m), y)); } static INLINE vopmask visinf_vo_vd(vdouble d) { return (vopmask)(vabs_vd_vd(d) == SLEEF_INFINITY); } static INLINE vopmask vispinf_vo_vd(vdouble d) { return (vopmask)(d == SLEEF_INFINITY); } static INLINE vopmask visminf_vo_vd(vdouble d) { return (vopmask)(d == -SLEEF_INFINITY); } static INLINE vopmask visnan_vo_vd(vdouble d) { return (vopmask)(d != d); } static INLINE vdouble vsqrt_vd_vd(vdouble d) { #if defined(__clang__) typedef int64_t vi64 __attribute__((ext_vector_type(VECTLENDP))); #else typedef int64_t vi64 __attribute__((vector_size(sizeof(int64_t)*VECTLENDP))); #endif vdouble q = vcast_vd_d(1); vopmask o = (vopmask)(d < 8.636168555094445E-78); d = (vdouble)((o & (vmask)(d * 1.157920892373162E77)) | (~o & (vmask)d)); q = (vdouble)((o & (vmask)vcast_vd_d(2.9387358770557188E-39)) | (~o & (vmask)vcast_vd_d(1))); q = (vdouble)vor_vm_vm_vm(vlt_vo_vd_vd(d, vcast_vd_d(0)), (vmask)q); vdouble x = (vdouble)(0x5fe6ec85e7de30daLL - ((vi64)(d + 1e-320) >> 1)); x = x * ( 3 - d * x * x); x = x * ( 12 - d * x * x); x = x * (768 - d * x * x); x *= 1.0 / (1 << 13); x = (d - (d * x) * (d * x)) * (x * 0.5) + d * x; return x * q; } static INLINE double vcast_d_vd(vdouble v) { return v[0]; } static INLINE float vcast_f_vf(vfloat v) { return v[0]; } static INLINE vdouble vload_vd_p(const double *ptr) { return *(vdouble *)ptr; } static INLINE vdouble vloadu_vd_p(const double *ptr) { vdouble vd; for(int i=0;i y), x, y); } static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) { return vsel_vf_vo_vf_vf((vopmask)(x < y), x, y); } static INLINE vfloat vsubadd_vf_vf_vf(vfloat x, vfloat y) { return vadd_vf_vf_vf(x, vnegpos_vf_vf(y)); } static INLINE vfloat vmlsubadd_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) { return vsubadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z); } static INLINE vopmask veq_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x == y); } static INLINE vopmask vneq_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x != y); } static INLINE vopmask vlt_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x < y); } static INLINE vopmask vle_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x <= y); } static INLINE vopmask vgt_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x > y); } static INLINE vopmask vge_vo_vf_vf(vfloat x, vfloat y) { return (vopmask)(x >= y); } static INLINE vint2 vadd_vi2_vi2_vi2(vint2 x, vint2 y) { return x + y; } static INLINE vint2 vsub_vi2_vi2_vi2(vint2 x, vint2 y) { return x - y; } static INLINE vint2 vneg_vi2_vi2(vint2 e) { return -e; } static INLINE vint2 vand_vi2_vi2_vi2(vint2 x, vint2 y) { return x & y; } static INLINE vint2 vandnot_vi2_vi2_vi2(vint2 x, vint2 y) { return y & ~x; } static INLINE vint2 vor_vi2_vi2_vi2(vint2 x, vint2 y) { return x | y; } static INLINE vint2 vxor_vi2_vi2_vi2(vint2 x, vint2 y) { return x ^ y; } static INLINE vint2 vand_vi2_vo_vi2(vopmask x, vint2 y) { return (vint2)x & y; } static INLINE vint2 vandnot_vi2_vo_vi2(vopmask x, vint2 y) { return y & ~(vint2)x; } static INLINE vint2 vsll_vi2_vi2_i(vint2 x, int c) { #if defined(__clang__) typedef uint32_t vu __attribute__((ext_vector_type(VECTLENDP*2))); #else typedef uint32_t vu __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP*2))); #endif return (vint2)(((vu)x) << c); } static INLINE vint2 vsrl_vi2_vi2_i(vint2 x, int c) { #if defined(__clang__) typedef uint32_t vu __attribute__((ext_vector_type(VECTLENDP*2))); #else typedef uint32_t vu __attribute__((vector_size(sizeof(uint32_t)*VECTLENDP*2))); #endif return (vint2)(((vu)x) >> c); } static INLINE vint2 vsra_vi2_vi2_i(vint2 x, int c) { return x >> c; } static INLINE vopmask veq_vo_vi2_vi2(vint2 x, vint2 y) { return (vopmask)(x == y); } static INLINE vopmask vgt_vo_vi2_vi2(vint2 x, vint2 y) { return (vopmask)(x > y); } static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { return x == y; } static INLINE vint2 vgt_vi2_vi2_vi2(vint2 x, vint2 y) { return x > y; } static INLINE vopmask visinf_vo_vf(vfloat d) { return (vopmask)(vabs_vf_vf(d) == SLEEF_INFINITYf); } static INLINE vopmask vispinf_vo_vf(vfloat d) { return (vopmask)(d == SLEEF_INFINITYf); } static INLINE vopmask visminf_vo_vf(vfloat d) { return (vopmask)(d == -SLEEF_INFINITYf); } static INLINE vopmask visnan_vo_vf(vfloat d) { return (vopmask)(d != d); } static INLINE vfloat vsqrt_vf_vf(vfloat d) { vfloat q = vcast_vf_f(1); vopmask o = (vopmask)(d < 5.4210108624275221700372640043497e-20f); // 2^-64 d = (vfloat)((o & (vmask)(d * vcast_vf_f(18446744073709551616.0f))) | (~o & (vmask)d)); // 2^64 q = (vfloat)((o & (vmask)vcast_vf_f(0.00000000023283064365386962890625f)) | (~o & (vmask)vcast_vf_f(1))); // 2^-32 q = (vfloat)vor_vm_vm_vm(vlt_vo_vf_vf(d, vcast_vf_f(0)), (vmask)q); vfloat x = (vfloat)(0x5f330de2 - (((vint2)d) >> 1)); x = x * ( 3.0f - d * x * x); x = x * (12.0f - d * x * x); x *= 0.0625f; x = (d - (d * x) * (d * x)) * (x * 0.5) + d * x; return x * q; } static INLINE vfloat vload_vf_p(const float *ptr) { return *(vfloat *)ptr; } static INLINE vfloat vloadu_vf_p(const float *ptr) { vfloat vf; for(int i=0;i #include #include #include #include #include #include // #if !(defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER)) #include #include #include static void FLOCK(FILE *fp) { flock(fileno(fp), LOCK_EX); } static void FUNLOCK(FILE *fp) { flock(fileno(fp), LOCK_UN); } static void FTRUNCATE(FILE *fp, off_t z) { if (ftruncate(fileno(fp), z)) ; } static FILE *OPENTMPFILE() { return tmpfile(); } static void CLOSETMPFILE(FILE *fp) { fclose(fp); } #else #include static void FLOCK(FILE *fp) { } static void FUNLOCK(FILE *fp) { } static void FTRUNCATE(FILE *fp, long z) { fseek(fp, 0, SEEK_SET); SetEndOfFile((HANDLE)_get_osfhandle(_fileno(fp))); } static FILE *OPENTMPFILE() { return fopen("tmpfile.txt", "w+"); } static void CLOSETMPFILE(FILE *fp) { fclose(fp); remove("tmpfile.txt"); } #endif // #define MAGIC_ARRAYMAPNODE 0xf73130fa #define MAGIC_ARRAYMAP 0x8693bd21 #define LOGNBUCKETS 8 #define NBUCKETS (1 << LOGNBUCKETS) static int hash(uint64_t key) { return (key ^ (key >> LOGNBUCKETS) ^ (key >> (LOGNBUCKETS*2)) ^ (key >> (LOGNBUCKETS*3))) & (NBUCKETS-1); } static void String_trim(char *str) { char *dst = str, *src = str, *pterm = src; while(*src != '\0' && isspace(*src)) src++; for(;*src != '\0';src++) { *dst++ = *src; if (!isspace(*src)) pterm = dst; } *pterm = '\0'; } typedef struct ArrayMapNode { uint32_t magic; uint64_t key; void *value; } ArrayMapNode; typedef struct ArrayMap { uint32_t magic; ArrayMapNode *array[NBUCKETS]; int size[NBUCKETS], capacity[NBUCKETS], totalSize; } ArrayMap; ArrayMap *initArrayMap() { ArrayMap *thiz = (ArrayMap *)calloc(1, sizeof(ArrayMap)); thiz->magic = MAGIC_ARRAYMAP; for(int i=0;icapacity[i] = 8; thiz->array[i] = (ArrayMapNode *)malloc(thiz->capacity[i] * sizeof(ArrayMapNode)); thiz->size[i] = 0; } thiz->totalSize = 0; return thiz; } void ArrayMap_dispose(ArrayMap *thiz) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); for(int j=0;jsize[j];i++) { assert(thiz->array[j][i].magic == MAGIC_ARRAYMAPNODE); thiz->array[j][i].magic = 0; } free(thiz->array[j]); } thiz->magic = 0; free(thiz); } int ArrayMap_size(ArrayMap *thiz) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); return thiz->totalSize; } uint64_t *ArrayMap_keyArray(ArrayMap *thiz) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); uint64_t *a = (uint64_t *)malloc(sizeof(uint64_t) * thiz->totalSize); int p = 0; for(int j=0;jsize[j];i++) { assert(thiz->array[j][i].magic == MAGIC_ARRAYMAPNODE); a[p++] = thiz->array[j][i].key; } } return a; } void **ArrayMap_valueArray(ArrayMap *thiz) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); void **a = (void **)malloc(sizeof(void *) * thiz->totalSize); int p = 0; for(int j=0;jsize[j];i++) { assert(thiz->array[j][i].magic == MAGIC_ARRAYMAPNODE); a[p++] = thiz->array[j][i].value; } } return a; } void *ArrayMap_remove(ArrayMap *thiz, uint64_t key) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); int h = hash(key); for(int i=0;isize[h];i++) { assert(thiz->array[h][i].magic == MAGIC_ARRAYMAPNODE); if (thiz->array[h][i].key == key) { void *old = thiz->array[h][i].value; thiz->array[h][i].key = thiz->array[h][thiz->size[h]-1].key; thiz->array[h][i].value = thiz->array[h][thiz->size[h]-1].value; thiz->array[h][thiz->size[h]-1].magic = 0; thiz->size[h]--; thiz->totalSize--; return old; } } return NULL; } void *ArrayMap_put(ArrayMap *thiz, uint64_t key, void *value) { if (value == NULL) return ArrayMap_remove(thiz, key); assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); int h = hash(key); for(int i=0;isize[h];i++) { assert(thiz->array[h][i].magic == MAGIC_ARRAYMAPNODE); if (thiz->array[h][i].key == key) { void *old = thiz->array[h][i].value; thiz->array[h][i].value = value; return old; } } if (thiz->size[h] >= thiz->capacity[h]) { thiz->capacity[h] *= 2; thiz->array[h] = (ArrayMapNode *)realloc(thiz->array[h], thiz->capacity[h] * sizeof(ArrayMapNode)); } ArrayMapNode *n = &(thiz->array[h][thiz->size[h]++]); n->magic = MAGIC_ARRAYMAPNODE; n->key = key; n->value = value; thiz->totalSize++; return NULL; } void *ArrayMap_get(ArrayMap *thiz, uint64_t key) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); int h = hash(key); for(int i=0;isize[h];i++) { assert(thiz->array[h][i].magic == MAGIC_ARRAYMAPNODE); if (thiz->array[h][i].key == key) { return thiz->array[h][i].value; } } return NULL; } #define LINELEN (1024*1024) ArrayMap *ArrayMap_load(const char *fn, const char *prefix, const char *idstr, int doLock) { const int idstrlen = strlen(idstr); int prefixLen = strlen(prefix) + 3; if (prefixLen >= LINELEN-10 || idstrlen >= LINELEN-10) return NULL; FILE *fp = fopen(fn, "r"); if (fp == NULL) return NULL; if (doLock) FLOCK(fp); ArrayMap *thiz = initArrayMap(); char *prefix2 = malloc(prefixLen+10); strcpy(prefix2, prefix); String_trim(prefix2); for(char *p = prefix2;*p != '\0';p++) { if (*p == ':') *p = ';'; if (*p == ' ') *p = '_'; } strcat(prefix2, " : "); prefixLen = strlen(prefix2); char *line = malloc(sizeof(char) * (LINELEN+10)); line[idstrlen] = '\0'; if (fread(line, sizeof(char), idstrlen, fp) != idstrlen || strcmp(idstr, line) != 0) { if (doLock) FUNLOCK(fp); fclose(fp); free(prefix2); free(line); return NULL; } int found = 0; for(;;) { line[LINELEN] = '\0'; if (fgets(line, LINELEN, fp) == NULL) break; if (strncmp(line, prefix2, prefixLen) != 0) continue; uint64_t key; char *value = malloc(sizeof(char) * LINELEN); if (sscanf(line + prefixLen, "%" SCNx64 " : %s\n", &key, value) == 2) { found = 1; ArrayMap_put(thiz, (uint64_t)key, (void *)value); } else { free(value); } } if (doLock) FUNLOCK(fp); fclose(fp); free(prefix2); free(line); return thiz; } int ArrayMap_save(ArrayMap *thiz, const char *fn, const char *prefix, const char *idstr) { assert(thiz != NULL && thiz->magic == MAGIC_ARRAYMAP); const int idstrlen = strlen(idstr); int prefixLen = strlen(prefix) + 3; if (prefixLen >= LINELEN-10 || idstrlen >= LINELEN-10) return -1; // Generate prefix2 char *prefix2 = malloc(prefixLen+10); strcpy(prefix2, prefix); String_trim(prefix2); for(char *p = prefix2;*p != '\0';p++) { if (*p == ':') *p = ';'; if (*p == ' ') *p = '_'; } strcat(prefix2, " : "); prefixLen = strlen(prefix2); // FILE *fp = fopen(fn, "a+"); if (fp == NULL) return -1; FLOCK(fp); fseek(fp, 0, SEEK_SET); // Copy the file specified by fn to tmpfile FILE *tmpfp = OPENTMPFILE(); if (tmpfp == NULL) { FUNLOCK(fp); fclose(fp); return -1; } char *line = malloc(sizeof(char) * (LINELEN+10)); line[idstrlen] = '\0'; if (fread(line, sizeof(char), idstrlen, fp) == idstrlen && strcmp(idstr, line) == 0) { for(;;) { line[LINELEN] = '\0'; if (fgets(line, LINELEN, fp) == NULL) break; if (strncmp(line, prefix2, prefixLen) != 0) fputs(line, tmpfp); } } // Write the contents in the map into tmpfile uint64_t *keys = ArrayMap_keyArray(thiz); int s = ArrayMap_size(thiz); for(int i=0;i= LINELEN-10) continue; fprintf(tmpfp, "%s %" PRIx64 " : %s\n", prefix2, keys[i], value); } free(keys); fseek(fp, 0, SEEK_SET); FTRUNCATE(fp, 0); fwrite(idstr, sizeof(char), strlen(idstr), fp); fseek(tmpfp, 0, SEEK_SET); for(;;) { size_t s = fread(line, 1, LINELEN, tmpfp); if (s == 0) break; fwrite(line, 1, s, fp); } FUNLOCK(fp); fclose(fp); CLOSETMPFILE(tmpfp); free(prefix2); free(line); return 0; } sleef-3.3.1/src/common/arraymap.h000066400000000000000000000015101333715643700166710ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifndef __ARRAYMAP_H__ #define __ARRAYMAP_H__ typedef struct ArrayMap ArrayMap; ArrayMap *initArrayMap(); void ArrayMap_dispose(ArrayMap *thiz); int ArrayMap_size(ArrayMap *thiz); void *ArrayMap_remove(ArrayMap *thiz, uint64_t key); void *ArrayMap_put(ArrayMap *thiz, uint64_t key, void *value); void *ArrayMap_get(ArrayMap *thiz, uint64_t key); uint64_t *ArrayMap_keyArray(ArrayMap *thiz); void **ArrayMap_valueArray(ArrayMap *thiz); int ArrayMap_save(ArrayMap *thiz, const char *fn, const char *prefix, const char *idstr); ArrayMap *ArrayMap_load(const char *fn, const char *prefix, const char *idstr, int doLock); #endif sleef-3.3.1/src/common/common.c000066400000000000000000000050211333715643700163410ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include "misc.h" #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #include EXPORT void *Sleef_malloc(size_t z) { return _aligned_malloc(z, 256); } EXPORT void Sleef_free(void *ptr) { _aligned_free(ptr); } EXPORT uint64_t Sleef_currentTimeMicros() { struct __timeb64 t; _ftime64(&t); return t.time * 1000000LL + t.millitm*1000; } #elif defined(__APPLE__) #include EXPORT void *Sleef_malloc(size_t z) { void *ptr = NULL; posix_memalign(&ptr, 256, z); return ptr; } EXPORT void Sleef_free(void *ptr) { free(ptr); } EXPORT uint64_t Sleef_currentTimeMicros() { struct timeval time; gettimeofday(&time, NULL); return (uint64_t)((time.tv_sec * 1000000LL) + time.tv_usec); } #else // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #include #include #ifdef __FreeBSD__ #include #else #include #endif EXPORT void *Sleef_malloc(size_t z) { void *ptr = NULL; posix_memalign(&ptr, 4096, z); return ptr; } EXPORT void Sleef_free(void *ptr) { free(ptr); } EXPORT uint64_t Sleef_currentTimeMicros() { struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (uint64_t)tp.tv_sec * 1000000LL + ((uint64_t)tp.tv_nsec/1000); } #endif // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #ifdef _MSC_VER #include EXPORT void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { __cpuidex(out, eax, ecx); } #else #if defined(__x86_64__) || defined(__i386__) EXPORT void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { uint32_t a, b, c, d; __asm__ __volatile__ ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (eax), "c"(ecx)); out[0] = a; out[1] = b; out[2] = c; out[3] = d; } #endif #endif #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) static char x86BrandString[256]; EXPORT char *Sleef_getCpuIdString() { union { int32_t info[4]; uint8_t str[16]; } u; int i,j; char *p; p = x86BrandString; for(i=0;i<3;i++) { Sleef_x86CpuID(u.info, i + 0x80000002, 0); for(j=0;j<16;j++) { *p++ = u.str[j]; } } *p++ = '\n'; return x86BrandString; } #else EXPORT char *Sleef_getCpuIdString() { return "Unknown architecture"; } #endif sleef-3.3.1/src/common/common.h000066400000000000000000000004451333715643700163530ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifndef __COMMON_H__ #define __COMMON_H__ char *Sleef_getCpuIdString(); #endif sleef-3.3.1/src/common/f128util.h000066400000000000000000000045171333715643700164450ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include static __float128 mpfr_get_f128(mpfr_t m, mpfr_rnd_t rnd) { if (isnan(mpfr_get_d(m, GMP_RNDN))) return __builtin_nan(""); mpfr_t frr, frd; mpfr_inits(frr, frd, NULL); mpfr_exp_t e; mpfr_frexp(&e, frr, m, GMP_RNDN); double d0 = mpfr_get_d(frr, GMP_RNDN); mpfr_set_d(frd, d0, GMP_RNDN); mpfr_sub(frr, frr, frd, GMP_RNDN); double d1 = mpfr_get_d(frr, GMP_RNDN); mpfr_set_d(frd, d1, GMP_RNDN); mpfr_sub(frr, frr, frd, GMP_RNDN); double d2 = mpfr_get_d(frr, GMP_RNDN); mpfr_clears(frr, frd, NULL); return ldexpq((__float128)d2 + (__float128)d1 + (__float128)d0, e); } static void mpfr_set_f128(mpfr_t frx, __float128 f, mpfr_rnd_t rnd) { char s[128]; quadmath_snprintf(s, 120, "%.50Qg", f); mpfr_set_str(frx, s, 10, rnd); } static void printf128(__float128 f) { char s[128]; quadmath_snprintf(s, 120, "%.50Qg", f); printf("%s", s); } static char frstr[16][1000]; static int frstrcnt = 0; static char *toBC(double d) { union { double d; uint64_t u64; int64_t i64; } cnv; cnv.d = d; int64_t l = cnv.i64; int e = (int)((l >> 52) & ~(-1L << 11)); int s = (int)(l >> 63); l = d == 0 ? 0 : ((l & ~((-1L) << 52)) | (1L << 52)); char *ptr = frstr[(frstrcnt++) & 15]; sprintf(ptr, "%s%lld*2^%d", s != 0 ? "-" : "", (long long int)l, (e-0x3ff-52)); return ptr; } static char *toBCq(__float128 d) { union { __float128 d; __uint128_t u128; } cnv; cnv.d = d; __uint128_t m = cnv.u128; int e = (int)((m >> 112) & ~(-1L << 15)); int s = (int)(m >> 127); m = d == 0 ? 0 : ((m & ((((__uint128_t)1) << 112)-1)) | ((__uint128_t)1 << 112)); uint64_t h = m / 10000000000000000000ULL; uint64_t l = m % 10000000000000000000ULL; char *ptr = frstr[(frstrcnt++) & 15]; sprintf(ptr, "%s%" PRIu64 "%019" PRIu64 "*2^%d", s != 0 ? "-" : "", h, l, (e-0x3fff-112)); return ptr; } static int xisnanq(Sleef_quad x) { return x != x; } static int xisinfq(Sleef_quad x) { return x == (Sleef_quad)__builtin_inf() || x == -(Sleef_quad)__builtin_inf(); } static int xisfiniteq(Sleef_quad x) { return !xisnanq(x) && !isinfq(x); } sleef-3.3.1/src/common/misc.h000066400000000000000000000160511333715643700160160ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // #ifndef __MISC_H__ #define __MISC_H__ #ifndef M_PI #define M_PI 3.141592653589793238462643383279502884 #endif #ifndef M_PIl #define M_PIl 3.141592653589793238462643383279502884L #endif #ifndef M_1_PI #define M_1_PI 0.318309886183790671537767526745028724 #endif #ifndef M_1_PIl #define M_1_PIl 0.318309886183790671537767526745028724L #endif #ifndef M_2_PI #define M_2_PI 0.636619772367581343075535053490057448 #endif #ifndef M_2_PIl #define M_2_PIl 0.636619772367581343075535053490057448L #endif #ifndef SLEEF_FP_ILOGB0 #define SLEEF_FP_ILOGB0 ((int)-2147483648) #endif #ifndef SLEEF_FP_ILOGBNAN #define SLEEF_FP_ILOGBNAN ((int)2147483647) #endif #define SLEEF_SNAN (((union { long long int i; double d; }) { .i = 0x7ff0000000000001LL }).d) #define SLEEF_SNANf (((union { long int i; float f; }) { .i = 0xff800001 }).f) // /* PI_A to PI_D are constants that satisfy the following two conditions. * For PI_A, PI_B and PI_C, the last 28 bits are zero. * PI_A + PI_B + PI_C + PI_D is close to PI as much as possible. The argument of a trig function is multiplied by 1/PI, and the integral part is divided into two parts, each has at most 28 bits. So, the maximum argument that could be correctly reduced should be 2^(28*2-1) PI = 1.1e+17. However, due to internal double precision calculation, the actual maximum argument that can be correctly reduced is around 2^47. */ #define PI_A 3.1415926218032836914 #define PI_B 3.1786509424591713469e-08 #define PI_C 1.2246467864107188502e-16 #define PI_D 1.2736634327021899816e-24 #define TRIGRANGEMAX 1e+14 /* PI_A2 and PI_B2 are constants that satisfy the following two conditions. * The last 3 bits of PI_A2 are zero. * PI_A2 + PI_B2 is close to PI as much as possible. The argument of a trig function is multiplied by 1/PI, and the integral part is multiplied by PI_A2. So, the maximum argument that could be correctly reduced should be 2^(3-1) PI = 12.6. By testing, we confirmed that it correctly reduces the argument up to around 15. */ #define PI_A2 3.141592653589793116 #define PI_B2 1.2246467991473532072e-16 #define TRIGRANGEMAX2 15 #define M_2_PI_H 0.63661977236758138243 #define M_2_PI_L -3.9357353350364971764e-17 #define SQRT_DBL_MAX 1.3407807929942596355e+154 #define TRIGRANGEMAX3 1e+9 #define M_4_PI 1.273239544735162542821171882678754627704620361328125 #define L2U .69314718055966295651160180568695068359375 #define L2L .28235290563031577122588448175013436025525412068e-12 #define R_LN2 1.442695040888963407359924681001892137426645954152985934135449406931 #define L10U 0.30102999566383914498 // log 2 / log 10 #define L10L 1.4205023227266099418e-13 #define LOG10_2 3.3219280948873623478703194294893901758648313930 #define L10Uf 0.3010253906f #define L10Lf 4.605038981e-06f // #define PI_Af 3.140625f #define PI_Bf 0.0009670257568359375f #define PI_Cf 6.2771141529083251953e-07f #define PI_Df 1.2154201256553420762e-10f #define TRIGRANGEMAXf 39000 #define PI_A2f 3.1414794921875f #define PI_B2f 0.00011315941810607910156f #define PI_C2f 1.9841872589410058936e-09f #define TRIGRANGEMAX2f 125.0f #define TRIGRANGEMAX4f 8e+6f #define SQRT_FLT_MAX 18446743523953729536.0 #define L2Uf 0.693145751953125f #define L2Lf 1.428606765330187045e-06f #define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f #define M_PIf ((float)M_PI) // #ifndef MIN #define MIN(x, y) ((x) < (y) ? (x) : (y)) #endif #ifndef MAX #define MAX(x, y) ((x) > (y) ? (x) : (y)) #endif #ifndef ABS #define ABS(x) ((x) < 0 ? -(x) : (x)) #endif typedef long double longdouble; #ifndef Sleef_double2_DEFINED #define Sleef_double2_DEFINED typedef struct { double x, y; } Sleef_double2; #endif #ifndef Sleef_float2_DEFINED #define Sleef_float2_DEFINED typedef struct { float x, y; } Sleef_float2; #endif #ifndef Sleef_longdouble2_DEFINED #define Sleef_longdouble2_DEFINED typedef struct { long double x, y; } Sleef_longdouble2; #endif #if defined(ENABLEFLOAT128) && !defined(Sleef_quad2_DEFINED) #define Sleef_quad2_DEFINED typedef __float128 Sleef_quad; typedef struct { __float128 x, y; } Sleef_quad2; #endif // #if defined (__GNUC__) || defined (__clang__) || defined(__INTEL_COMPILER) #define LIKELY(condition) __builtin_expect(!!(condition), 1) #define UNLIKELY(condition) __builtin_expect(!!(condition), 0) #define RESTRICT __restrict__ #ifndef __ARM_FEATURE_SVE #define INLINE __attribute__((always_inline)) #else // This is a workaround of a bug in armclang #define INLINE #endif #ifndef __arm__ #define ALIGNED(x) __attribute__((aligned(x))) #else #define ALIGNED(x) #endif #ifndef __INTEL_COMPILER #define CONST const #else #define CONST #endif #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) #ifndef SLEEF_STATIC_LIBS #define EXPORT __stdcall __declspec(dllexport) #define NOEXPORT #else // #ifndef SLEEF_STATIC_LIBS #define EXPORT #define NOEXPORT #endif // #ifndef SLEEF_STATIC_LIBS #else // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) #define EXPORT __attribute__((visibility("default"))) #define NOEXPORT __attribute__ ((visibility ("hidden"))) #endif // #if defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) #define SLEEF_NAN __builtin_nan("") #define SLEEF_NANf __builtin_nanf("") #define SLEEF_NANl __builtin_nanl("") #define SLEEF_INFINITY __builtin_inf() #define SLEEF_INFINITYf __builtin_inff() #define SLEEF_INFINITYl __builtin_infl() #if defined(__INTEL_COMPILER) || defined (__clang__) #define SLEEF_INFINITYq __builtin_inf() #define SLEEF_NANq __builtin_nan("") #else #define SLEEF_INFINITYq __builtin_infq() #define SLEEF_NANq (SLEEF_INFINITYq - SLEEF_INFINITYq) #endif #elif defined(_MSC_VER) #define INLINE __forceinline #define CONST #define RESTRICT #define ALIGNED(x) #define LIKELY(condition) (condition) #define UNLIKELY(condition) (condition) #ifndef SLEEF_STATIC_LIBS #define EXPORT __declspec(dllexport) #define NOEXPORT #else #define EXPORT #define NOEXPORT #endif #if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) #include #endif #define SLEEF_INFINITY (1e+300 * 1e+300) #define SLEEF_NAN (SLEEF_INFINITY - SLEEF_INFINITY) #define SLEEF_INFINITYf ((float)SLEEF_INFINITY) #define SLEEF_NANf ((float)SLEEF_NAN) #define SLEEF_INFINITYl ((long double)SLEEF_INFINITY) #define SLEEF_NANl ((long double)SLEEF_NAN) #if (defined(_M_AMD64) || defined(_M_X64)) #ifndef __SSE2__ #define __SSE2__ #define __SSE3__ #define __SSE4_1__ #endif #elif _M_IX86_FP == 2 #ifndef __SSE2__ #define __SSE2__ #define __SSE3__ #define __SSE4_1__ #endif #elif _M_IX86_FP == 1 #ifndef __SSE__ #define __SSE__ #endif #endif #endif // defined(_MSC_VER) #if !defined(__linux__) #define isinff(x) ((x) == SLEEF_INFINITYf || (x) == -SLEEF_INFINITYf) #define isinfl(x) ((x) == SLEEF_INFINITYl || (x) == -SLEEF_INFINITYl) #define isnanf(x) ((x) != (x)) #define isnanl(x) ((x) != (x)) #endif #endif // #ifndef __MISC_H__ sleef-3.3.1/src/dft-tester/000077500000000000000000000000001333715643700155005ustar00rootroot00000000000000sleef-3.3.1/src/dft-tester/CMakeLists.txt000066400000000000000000000266231333715643700202510ustar00rootroot00000000000000# Compiler properties set(CMAKE_C_FLAGS ORG_CMAKE_C_FLAGS) string(CONCAT CMAKE_C_FLAGS ${DFT_C_FLAGS}) set(COMMON_TARGET_PROPERTIES C_STANDARD 99 # -std=gnu99 ) # function(add_test_dft TESTNAME) if (ARMIE_COMMAND) add_test(NAME ${TESTNAME} COMMAND ${ARMIE_COMMAND} -msve-vector-bits=${SVE_VECTOR_BITS} ${ARGN}) elseif (NOT EMULATOR AND NOT SDE_COMMAND) add_test(NAME ${TESTNAME} COMMAND ${ARGN}) elseif(NOT EMULATOR) add_test(NAME ${TESTNAME} COMMAND ${SDE_COMMAND} "--" ${ARGN}) else() add_test(NAME ${TESTNAME} COMMAND ${EMULATOR} ${ARGN}) endif() endfunction() # Include directories include_directories(${PROJECT_SOURCE_DIR}/include) # sleefdft.h include_directories(${CMAKE_BINARY_DIR}/include) # sleef.h if (FFTW3_INCLUDE_DIR) include_directories(${FFTW3_INCLUDE_DIR}) # fftw3.h endif() # Link directories link_directories(${CMAKE_BINARY_DIR}/lib) # libsleef, libsleefdft # Link libraries set(COMMON_LINK_LIBRARIES ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) if (COMPILER_SUPPORTS_OPENMP) set(COMMON_LINK_LIBRARIES ${COMMON_LINK_LIBRARIES} ${OpenMP_C_FLAGS}) endif() if(NOT MSVC) # Target executable naivetestdp set(TARGET_NAIVETESTDP "naivetestdp") add_executable(${TARGET_NAIVETESTDP} naivetest.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_NAIVETESTDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_NAIVETESTDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1) target_link_libraries(${TARGET_NAIVETESTDP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_NAIVETESTDP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable naivetestsp set(TARGET_NAIVETESTSP "naivetestsp") add_executable(${TARGET_NAIVETESTSP} naivetest.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_NAIVETESTSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_NAIVETESTSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2) target_link_libraries(${TARGET_NAIVETESTSP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_NAIVETESTSP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Test naivetestdp add_test_dft(${TARGET_NAIVETESTDP}_1 $ 1) add_test_dft(${TARGET_NAIVETESTDP}_2 $ 2) add_test_dft(${TARGET_NAIVETESTDP}_3 $ 3) add_test_dft(${TARGET_NAIVETESTDP}_4 $ 4) add_test_dft(${TARGET_NAIVETESTDP}_5 $ 5) add_test_dft(${TARGET_NAIVETESTDP}_10 $ 10) # Test naivetestsp add_test_dft(${TARGET_NAIVETESTSP}_1 $ 1) add_test_dft(${TARGET_NAIVETESTSP}_2 $ 2) add_test_dft(${TARGET_NAIVETESTSP}_3 $ 3) add_test_dft(${TARGET_NAIVETESTSP}_4 $ 4) add_test_dft(${TARGET_NAIVETESTSP}_5 $ 5) add_test_dft(${TARGET_NAIVETESTSP}_10 $ 10) endif() # Target executable roundtriptest1ddp set(TARGET_ROUNDTRIPTEST1DDP "roundtriptest1ddp") add_executable(${TARGET_ROUNDTRIPTEST1DDP} roundtriptest1d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_ROUNDTRIPTEST1DDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_ROUNDTRIPTEST1DDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1) target_link_libraries(${TARGET_ROUNDTRIPTEST1DDP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_ROUNDTRIPTEST1DDP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable roundtriptest1dsp set(TARGET_ROUNDTRIPTEST1DSP "roundtriptest1dsp") add_executable(${TARGET_ROUNDTRIPTEST1DSP} roundtriptest1d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_ROUNDTRIPTEST1DSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_ROUNDTRIPTEST1DSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2) target_link_libraries(${TARGET_ROUNDTRIPTEST1DSP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_ROUNDTRIPTEST1DSP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable roundtriptest2ddp set(TARGET_ROUNDTRIPTEST2DDP "roundtriptest2ddp") add_executable(${TARGET_ROUNDTRIPTEST2DDP} roundtriptest2d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_ROUNDTRIPTEST2DDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_ROUNDTRIPTEST2DDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1) target_link_libraries(${TARGET_ROUNDTRIPTEST2DDP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_ROUNDTRIPTEST2DDP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable roundtriptest2dsp set(TARGET_ROUNDTRIPTEST2DSP "roundtriptest2dsp") add_executable(${TARGET_ROUNDTRIPTEST2DSP} roundtriptest2d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_ROUNDTRIPTEST2DSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_ROUNDTRIPTEST2DSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2) target_link_libraries(${TARGET_ROUNDTRIPTEST2DSP} ${COMMON_LINK_LIBRARIES}) set_target_properties(${TARGET_ROUNDTRIPTEST2DSP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) if (LIBFFTW3) # Target executable fftwtest1ddp set(TARGET_FFTWTEST1DDP "fftwtest1ddp") add_executable(${TARGET_FFTWTEST1DDP} fftwtest1d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_FFTWTEST1DDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_FFTWTEST1DDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1) target_link_libraries(${TARGET_FFTWTEST1DDP} ${COMMON_LINK_LIBRARIES} ${LIBFFTW3}) set_target_properties(${TARGET_FFTWTEST1DDP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable fftwtest1dsp set(TARGET_FFTWTEST1DSP "fftwtest1dsp") add_executable(${TARGET_FFTWTEST1DSP} fftwtest1d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_FFTWTEST1DSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_FFTWTEST1DSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2) target_link_libraries(${TARGET_FFTWTEST1DSP} ${COMMON_LINK_LIBRARIES} ${LIBFFTW3}) set_target_properties(${TARGET_FFTWTEST1DSP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable fftwtest2ddp set(TARGET_FFTWTEST2DDP "fftwtest2ddp") add_executable(${TARGET_FFTWTEST2DDP} fftwtest2d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_FFTWTEST2DDP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_FFTWTEST2DDP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=1) target_link_libraries(${TARGET_FFTWTEST2DDP} ${COMMON_LINK_LIBRARIES} ${LIBFFTW3}) set_target_properties(${TARGET_FFTWTEST2DDP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Target executable fftwtest2dsp set(TARGET_FFTWTEST2DSP "fftwtest2dsp") add_executable(${TARGET_FFTWTEST2DSP} fftwtest2d.c ${PROJECT_SOURCE_DIR}/include/sleefdft.h) add_dependencies(${TARGET_FFTWTEST2DSP} ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ${TARGET_LIBDFT}) target_compile_definitions(${TARGET_FFTWTEST2DSP} PRIVATE ${COMMON_TARGET_DEFINITIONS} BASETYPEID=2) target_link_libraries(${TARGET_FFTWTEST2DSP} ${COMMON_LINK_LIBRARIES} ${LIBFFTW3}) set_target_properties(${TARGET_FFTWTEST2DSP} PROPERTIES ${COMMON_TARGET_PROPERTIES}) # Test fftwtest1ddp add_test_dft(${TARGET_FFTWTEST1DDP}_12 $ 12) add_test_dft(${TARGET_FFTWTEST1DDP}_16 $ 16) # Test fftwtest1dsp add_test_dft(${TARGET_FFTWTEST1DSP}_12 $ 12) add_test_dft(${TARGET_FFTWTEST1DSP}_16 $ 16) # Test fftwtest2ddp add_test_dft(${TARGET_FFTWTEST2DDP}_2_2 $ 2 2) add_test_dft(${TARGET_FFTWTEST2DDP}_4_4 $ 4 4) add_test_dft(${TARGET_FFTWTEST2DDP}_8_8 $ 8 8) add_test_dft(${TARGET_FFTWTEST2DDP}_10_10 $ 10 10) add_test_dft(${TARGET_FFTWTEST2DDP}_5_15 $ 5 15) # Test fftwtest2dsp add_test_dft(${TARGET_FFTWTEST2DSP}_2_2 $ 2 2) add_test_dft(${TARGET_FFTWTEST2DSP}_4_4 $ 4 4) add_test_dft(${TARGET_FFTWTEST2DSP}_8_8 $ 8 8) add_test_dft(${TARGET_FFTWTEST2DSP}_10_10 $ 10 10) add_test_dft(${TARGET_FFTWTEST2DSP}_5_15 $ 5 15) else(LIBFFTW3) if(MSVC) # Test roundtriptestdp add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_1 $ 1 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_2 $ 2 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_3 $ 3 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_4 $ 4 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_5 $ 5 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_10 $ 10 10) # Test roundtriptestsp add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_1 $ 1 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_2 $ 2 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_3 $ 3 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_4 $ 4 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_5 $ 5 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_10 $ 10 10) endif() add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_12 $ 12 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DDP}_16 $ 16 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_12 $ 12 10) add_test_dft(${TARGET_ROUNDTRIPTEST1DSP}_16 $ 16 10) # Test roundtriptest2ddp add_test_dft(${TARGET_ROUNDTRIPTEST2DDP}_2_2 $ 2 2 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DDP}_4_4 $ 4 4 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DDP}_8_8 $ 8 8 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DDP}_10_10 $ 10 10 2) add_test_dft(${TARGET_ROUNDTRIPTEST2DDP}_5_15 $ 5 15 2) # Test roundtriptest2dsp add_test_dft(${TARGET_ROUNDTRIPTEST2DSP}_2_2 $ 2 2 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DSP}_4_4 $ 4 4 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DSP}_8_8 $ 8 8 10) add_test_dft(${TARGET_ROUNDTRIPTEST2DSP}_10_10 $ 10 10 2) add_test_dft(${TARGET_ROUNDTRIPTEST2DSP}_5_15 $ 5 15 2) endif(LIBFFTW3) sleef-3.3.1/src/dft-tester/bench1d.c000066400000000000000000000055261333715643700171600ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #define _DEFAULT_SOURCE #define _XOPEN_SOURCE 700 #include #include #include #include #include #include #include #include #include #ifdef USEFFTW #include #include #else #include "sleef.h" #include "sleefdft.h" #endif typedef double real; static uint64_t gettime() { struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (uint64_t)tp.tv_sec * 1000000000 + ((uint64_t)tp.tv_nsec); } #define REPEAT 8 int main(int argc, char **argv) { if (argc == 1) { fprintf(stderr, "%s \n", argv[0]); exit(-1); } int backward = 0; int log2n = atoi(argv[1]); if (log2n < 0) { backward = 1; log2n = -log2n; } const int n = 1 << log2n; const int64_t niter = (int)(100000000000.0 / n / log2n); printf("Number of iterations = %lld\n", (long long int)niter); #ifdef USEFFTW fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n); fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n); #if 0 int fftw_init_threads(void); fftw_plan_with_nthreads(omp_get_max_threads()); #endif fftw_plan w = fftw_plan_dft_1d(n, in, out, backward ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_MEASURE); //fftw_plan w = fftw_plan_dft_1d(n, in, out, backward ? FFTW_BACKWARD : FFTW_FORWARD, FFTW_PATIENT); for(int i=0;i= 3) mode = SLEEF_MODE_VERBOSE | SLEEF_MODE_ESTIMATE; if (backward) mode |= SLEEF_MODE_BACKWARD; struct SleefDFT *p = SleefDFT_double_init1d(n, in, out, mode); if (argc >= 3) SleefDFT_setPath(p, argv[2]); for(int i=0;i #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include #include "sleef.h" #include "sleefdft.h" #include #ifndef MODE #define MODE SLEEF_MODE_DEBUG #endif #if BASETYPEID == 1 #define THRES 1e-30 #define SleefDFT_init1d SleefDFT_double_init1d #define SleefDFT_execute SleefDFT_double_execute typedef double real; #elif BASETYPEID == 2 #define THRES 1e-13 #define SleefDFT_init1d SleefDFT_float_init1d #define SleefDFT_execute SleefDFT_float_execute typedef float real; #else #error BASETYPEID not set #endif static double squ(double x) { return x * x; } // complex forward double check_cf(int n) { fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n); fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n); fftw_plan w = fftw_plan_dft_1d(n, in, out, FFTW_FORWARD, FFTW_ESTIMATE); real *sx = (real *)Sleef_malloc(n*2*sizeof(real)); real *sy = (real *)Sleef_malloc(n*2*sizeof(real)); struct SleefDFT *p = SleefDFT_init1d(n, sx, sy, MODE); for(int i=0;i\n", argv[0]); exit(-1); } const int n = 1 << atoi(argv[1]); srand(time(NULL)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); // int success = 1; double e; e = check_cf(n); success = success && e < THRES; printf("complex forward : %s (%g)\n", e < THRES ? "OK" : "NG", e); e = check_cb(n); success = success && e < THRES; printf("complex backward : %s (%g)\n", e < THRES ? "OK" : "NG", e); e = check_rf(n); success = success && e < THRES; printf("real forward : %s (%g)\n", e < THRES ? "OK" : "NG", e); e = check_rb(n); success = success && e < THRES; printf("real backward : %s (%g)\n", e < THRES ? "OK" : "NG", e); exit(success ? 0 : -1); } sleef-3.3.1/src/dft-tester/fftwtest2d.c000066400000000000000000000075041333715643700177460ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include #include "sleef.h" #include "sleefdft.h" #include #ifndef MODE #define MODE SLEEF_MODE_DEBUG #endif #if BASETYPEID == 1 #define THRES 1e-30 #define SleefDFT_init2d SleefDFT_double_init2d #define SleefDFT_execute SleefDFT_double_execute typedef double real; #elif BASETYPEID == 2 #define THRES 1e-13 #define SleefDFT_init2d SleefDFT_float_init2d #define SleefDFT_execute SleefDFT_float_execute typedef float real; #else #error BASETYPEID not set #endif static double squ(double x) { return x * x; } // complex forward double check_cf(int n, int m) { fftw_complex *in = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n * m); fftw_complex *out = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * n * m); fftw_plan w = fftw_plan_dft_2d(n, m, in, out, FFTW_FORWARD, FFTW_ESTIMATE); real *sx = (real *)Sleef_malloc(n*m*2*sizeof(real)); real *sy = (real *)Sleef_malloc(n*m*2*sizeof(real)); struct SleefDFT *p = SleefDFT_init2d(n, m, sx, sy, MODE); for(int i=0;i \n", argv[0]); exit(-1); } const int n = 1 << atoi(argv[1]); const int m = 1 << atoi(argv[2]); srand(time(NULL)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); // int success = 1; double e; e = check_cf(n, m); success = success && e < THRES; printf("complex forward : %s (%g)\n", e < THRES ? "OK" : "NG", e); e = check_cb(n, m); success = success && e < THRES; printf("complex backward : %s (%g)\n", e < THRES ? "OK" : "NG", e); exit(success ? 0 : -1); } sleef-3.3.1/src/dft-tester/measuredft.c000066400000000000000000000100321333715643700177770ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #define _DEFAULT_SOURCE #define _XOPEN_SOURCE 700 #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include #include "sleef.h" #include "sleefdft.h" static uint64_t gettime() { struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (uint64_t)tp.tv_sec * 1000000000 + ((uint64_t)tp.tv_nsec); } int mode[] = { SLEEF_MODE_MEASURE | SLEEF_MODE_NO_MT, SLEEF_MODE_MEASURE}; #define ENABLE_SP //#define ROUNDTRIP #define REPEAT 2 //#define ENABLE_SLEEP //#define WARMUP int main(int argc, char **argv) { int start = 1, end = 18; if (argc > 1) start = atoi(argv[1]); if (argc > 2) end = atoi(argv[2]); double *din = (double *)Sleef_malloc((1 << 18)*2 * sizeof(double)); double *dout = (double *)Sleef_malloc((1 << 18)*2 * sizeof(double)); float *sin = (float *)Sleef_malloc((1 << 18)*2 * sizeof(float)); float *sout = (float *)Sleef_malloc((1 << 18)*2 * sizeof(float)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); for(int log2n=start;log2n<=end;log2n++) { const int n = 1 << log2n; int64_t niter = (int64_t)(1000000000.0 / REPEAT / n / log2n); printf("%d ", n); for(int m=0;m<2;m++) { #ifdef ENABLE_SLEEP sleep(1); #endif struct SleefDFT *pf = SleefDFT_double_init1d(n, NULL, NULL, mode[m]); #ifdef ROUNDTRIP struct SleefDFT *pb = SleefDFT_double_init1d(n, NULL, NULL, mode[m] | SLEEF_MODE_BACKWARD); #endif for(int i=0;i #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include #include "sleef.h" #include "sleefdft.h" #include "misc.h" #ifndef MODE #define MODE SLEEF_MODE_DEBUG #endif #define THRES 1e-4 #if BASETYPEID == 1 #define SleefDFT_init SleefDFT_double_init1d #define SleefDFT_execute SleefDFT_double_execute typedef double real; typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PIl * _Complex_I / n) * kn); } #elif BASETYPEID == 2 #define SleefDFT_init SleefDFT_float_init1d #define SleefDFT_execute SleefDFT_float_execute typedef float real; typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PIl * _Complex_I / n) * kn); } #elif BASETYPEID == 3 #define SleefDFT_init SleefDFT_longdouble_init1d #define SleefDFT_execute SleefDFT_longdouble_execute typedef double real; typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PIl * _Complex_I / n) * kn); } #elif BASETYPEID == 4 #include #define SleefDFT_init SleefDFT_quad_init1d #define SleefDFT_execute SleefDFT_quad_execute typedef Sleef_quad real; typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PIl * _Complex_I / n) * kn); } #else #error No BASETYPEID specified #endif void forward(cmpl *ts, cmpl *fs, int len) { int k, n; for(k=0;k THRES) || (fabs(sy[(i*2+1)] - cimag(fs[i])) > THRES)) { success = 0; } double t; t = (sy[(i*2+0)] - creal(fs[i])); rmsn += t*t; t = (sy[(i*2+1)] - cimag(fs[i])); rmsn += t*t; rmsd += creal(fs[i]) * creal(fs[i]) + cimag(fs[i]) * cimag(fs[i]); } // free(fs); free(ts); Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } // complex backward int check_cb(int n) { int i; real *sx = (real *)Sleef_malloc(sizeof(real)*n*2); real *sy = (real *)Sleef_malloc(sizeof(real)*n*2); cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n); cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n); // for(i=0;i THRES) || (fabs(sy[(i*2+1)] - cimag(ts[i])) > THRES)) { success = 0; } } // free(fs); free(ts); Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } // real forward int check_rf(int n) { int i; real *sx = (real *)Sleef_malloc(n * sizeof(real)); real *sy = (real *)Sleef_malloc((n/2+1)*sizeof(real)*2); cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n); cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n); // for(i=0;i THRES) success = 0; if (fabs(sy[(2*i+1)] - cimag(fs[i])) > THRES) success = 0; } // free(fs); free(ts); Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } // real backward int check_rb(int n) { int i; cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n); cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n); // for(i=0;i THRES) { success = 0; } if ((fabs(sy[i] - creal(ts[i])) > THRES)) { success = 0; } } // free(fs); free(ts); Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } int check_arf(int n) { int i; real *sx = (real *)Sleef_malloc(n * sizeof(real)); real *sy = (real *)Sleef_malloc(n * sizeof(real)); cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n); cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n); // for(i=0;i THRES) success = 0; if (fabs(sy[(2*0+1)] - creal(fs[n/2])) > THRES) success = 0; } else { if (fabs(sy[(2*i+0)] - creal(fs[i])) > THRES) success = 0; if (fabs(sy[(2*i+1)] - cimag(fs[i])) > THRES) success = 0; } } // Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } int check_arb(int n) { int i; real *sx = (real *)Sleef_malloc(n * sizeof(real)); real *sy = (real *)Sleef_malloc(n * sizeof(real)); cmpl *ts = (cmpl *)malloc(sizeof(cmpl)*n); cmpl *fs = (cmpl *)malloc(sizeof(cmpl)*n); // for(i=0;i THRES) { success = 0; } if ((fabs(sy[i]*2 - creal(ts[i])) > THRES)) { success = 0; } } // free(fs); free(ts); Sleef_free(sx); Sleef_free(sy); SleefDFT_dispose(p); // return success; } int main(int argc, char **argv) { if (argc != 2) { fprintf(stderr, "%s \n", argv[0]); exit(-1); } const int n = 1 << atoi(argv[1]); srand(time(NULL)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); // int success = 1; printf("complex forward : %s\n", (success &= check_cf(n)) ? "OK" : "NG"); printf("complex backward : %s\n", (success &= check_cb(n)) ? "OK" : "NG"); printf("real forward : %s\n", (success &= check_rf(n)) ? "OK" : "NG"); printf("real backward : %s\n", (success &= check_rb(n)) ? "OK" : "NG"); printf("real alt forward : %s\n", (success &= check_arf(n)) ? "OK" : "NG"); printf("real alt backward : %s\n", (success &= check_arb(n)) ? "OK" : "NG"); exit(!success); } sleef-3.3.1/src/dft-tester/roundtriptest1d.c000066400000000000000000000073571333715643700210330ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include #include "sleef.h" #include "sleefdft.h" #ifndef MODE #define MODE (SLEEF_MODE_DEBUG | SLEEF_MODE_VERBOSE) #endif #if BASETYPEID == 1 #define THRES 1e-30 #define SleefDFT_init SleefDFT_double_init1d #define SleefDFT_execute SleefDFT_double_execute typedef double real; #elif BASETYPEID == 2 #define THRES 1e-13 #define SleefDFT_init SleefDFT_float_init1d #define SleefDFT_execute SleefDFT_float_execute typedef float real; #else #error BASETYPEID not set #endif static double squ(double x) { return x * x; } // complex transforms double check_c(int n) { struct SleefDFT *p; real *sx = (real *)Sleef_malloc(n*2 * sizeof(real)); real *sy = (real *)Sleef_malloc(n*2 * sizeof(real)); real *sz = (real *)Sleef_malloc(n*2 * sizeof(real)); for(int i=0;i []\n", argv[0]); exit(-1); } const int n = 1 << atoi(argv[1]); const int nloop = argc >= 3 ? atoi(argv[2]) : 1; srand(time(NULL)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); // int success = 1; double e; for(int i=0;(nloop < 0 || i < nloop) && success;i++) { e = check_c(n); success = success && e < THRES; printf("complex : %s (%g)\n", e < THRES ? "OK" : "NG", e); e = check_r(n); success = success && e < THRES; printf("real : %s (%g)\n", e < THRES ? "OK" : "NG", e); } exit(!success); } sleef-3.3.1/src/dft-tester/roundtriptest2d.c000066400000000000000000000053321333715643700210230ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include #include "sleef.h" #include "sleefdft.h" #ifndef MODE #define MODE (SLEEF_MODE_DEBUG | SLEEF_MODE_VERBOSE) #endif #if BASETYPEID == 1 #define THRES 1e-30 #define SleefDFT_init2d SleefDFT_double_init2d #define SleefDFT_execute SleefDFT_double_execute typedef double real; #elif BASETYPEID == 2 #define THRES 1e-13 #define SleefDFT_init2d SleefDFT_float_init2d #define SleefDFT_execute SleefDFT_float_execute typedef float real; #else #error BASETYPEID not set #endif static double squ(double x) { return x * x; } // complex transforms double check_c(int n, int m) { struct SleefDFT *p; real *sx = (real *)Sleef_malloc(n*m*2 * sizeof(real)); real *sy = (real *)Sleef_malloc(n*m*2 * sizeof(real)); real *sz = (real *)Sleef_malloc(n*m*2 * sizeof(real)); for(int i=0;i []\n", argv[0]); exit(-1); } const int n = 1 << atoi(argv[1]); const int m = 1 << atoi(argv[2]); const int nloop = argc >= 4 ? atoi(argv[3]) : 1; srand(time(NULL)); SleefDFT_setPlanFilePath(NULL, NULL, SLEEF_PLAN_RESET | SLEEF_PLAN_READONLY); // int success = 1; double e; for(int i=0;(nloop < 0 || i < nloop) && success;i++) { e = check_c(n, m); success = success && e < THRES; printf("complex : %s (%g)\n", e < THRES ? "OK" : "NG", e); } exit(!success); } sleef-3.3.1/src/dft-tester/tutorial.c000066400000000000000000000034561333715643700175170ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // gcc tutorial.c -lsleef -lsleefdft -lm #include #include #include #include #include #include "sleef.h" #include "sleefdft.h" #define THRES 1e-4 typedef double complex cmpl; cmpl omega(double n, double kn) { return cexp((-2 * M_PI * _Complex_I / n) * kn); } void forward(cmpl *ts, cmpl *fs, int len) { for(int k=0;k THRES) || (fabs(sy[(i*2+1)] - cimag(fs[i])) > THRES)) { success = 0; } } printf("%s\n", success ? "OK" : "NG"); free(fs); free(ts); Sleef_free(sy); Sleef_free(sx); SleefDFT_dispose(p); exit(success); } sleef-3.3.1/src/dft/000077500000000000000000000000001333715643700141745ustar00rootroot00000000000000sleef-3.3.1/src/dft/CMakeLists.txt000066400000000000000000000331371333715643700167430ustar00rootroot00000000000000# Option MAXBUTWIDTH if (COMPILER_SUPPORTS_SVE) set(SLEEFDFT_MAXBUTWIDTH 6 CACHE STRING "Log_2 (Maximum butterfly length) of butterflies") else() set(SLEEFDFT_MAXBUTWIDTH 4 CACHE STRING "Log_2 (Maximum butterfly length) of butterflies") endif() if (SLEEFDFT_MAXBUTWIDTH GREATER 7) message(FATAL_ERROR "SLEEFDFT_MAXBUTWIDTH has to be smaller than 8." ) endif() # Option option(SLEEFDFT_ENABLE_STREAM "Streaming instructions are utilized in DFT." OFF) option(SLEEFDFT_ENABLE_LONGDOUBLE "Long double routines will be compiled in." OFF) option(SLEEFDFT_ENABLE_QUAD "Quad precision routines will be compiled in." OFF) # Compiler properties set(CMAKE_C_FLAGS ORG_CMAKE_C_FLAGS) string(CONCAT CMAKE_C_FLAGS ${DFT_C_FLAGS}) set(COMMON_TARGET_PROPERTIES C_STANDARD 99 # -std=gnu99 ) if (BUILD_SHARED_LIBS) list(APPEND COMMON_TARGET_PROPERTIES POSITION_INDEPENDENT_CODE ON) # -fPIC endif() set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} MAXBUTWIDTH=${SLEEFDFT_MAXBUTWIDTH}) if (SLEEFDFT_ENABLE_STREAM) set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_STREAM=1) else() set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_STREAM=0) endif() if (COMPILER_SUPPORTS_FLOAT128) set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLEFLOAT128) endif(COMPILER_SUPPORTS_FLOAT128) if (COMPILER_SUPPORTS_LONG_DOUBLE) set(COMMON_TARGET_DEFINITIONS ${COMMON_TARGET_DEFINITIONS} ENABLE_LONGDOUBLE) endif (COMPILER_SUPPORTS_LONG_DOUBLE) if(COMPILER_SUPPORTS_OPENMP) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") endif(COMPILER_SUPPORTS_OPENMP) # Include directories include_directories(${PROJECT_SOURCE_DIR}/include) include_directories(${PROJECT_BINARY_DIR}/include) include_directories(${CMAKE_CURRENT_BINARY_DIR}) # Constants definition set(LISTSHORTTYPENAME "dp" "sp" "ld" "qp") set(LISTLONGTYPENAME "double" "float" "longdouble" "Sleef_quad") set(LISTTYPEID "1" "2" "3" "4") set(MACRODEF_vecextdp BASETYPEID=1 ENABLE_VECEXT CONFIG=1) set(CFLAGS_vecextdp ${FLAGS_ENABLE_VECEXT}) set(MACRODEF_vecextsp BASETYPEID=2 ENABLE_VECEXT CONFIG=1) set(CFLAGS_vecextsp ${FLAGS_ENABLE_VECEXT}) set(MACRODEF_vecextld BASETYPEID=3 ENABLE_VECEXT CONFIG=1) set(CFLAGS_vecextld ${FLAGS_ENABLE_VECEXT}) set(MACRODEF_vecextqp BASETYPEID=4 ENABLE_VECEXT CONFIG=1) set(CFLAGS_vecextqp ${FLAGS_ENABLE_VECEXT}) set(MACRODEF_purecdp BASETYPEID=1 ENABLE_PUREC CONFIG=1) set(CFLAGS_purecdp ${FLAGS_ENABLE_PUREC}) set(MACRODEF_purecsp BASETYPEID=2 ENABLE_PUREC CONFIG=1) set(CFLAGS_purecsp ${FLAGS_ENABLE_PUREC}) set(MACRODEF_purecld BASETYPEID=3 ENABLE_PUREC CONFIG=1) set(CFLAGS_purecld ${FLAGS_ENABLE_PUREC}) set(MACRODEF_purecqp BASETYPEID=4 ENABLE_PUREC CONFIG=1) set(CFLAGS_purecqp ${FLAGS_ENABLE_PUREC}) set(MACRODEF_sse2dp BASETYPEID=1 ENABLE_SSE2 CONFIG=4) set(CFLAGS_sse2dp ${FLAGS_ENABLE_SSE4}) set(MACRODEF_sse2sp BASETYPEID=2 ENABLE_SSE2 CONFIG=4) set(CFLAGS_sse2sp ${FLAGS_ENABLE_SSE4}) set(MACRODEF_avxdp BASETYPEID=1 ENABLE_AVX CONFIG=1) set(CFLAGS_avxdp ${FLAGS_ENABLE_AVX}) set(MACRODEF_avxsp BASETYPEID=2 ENABLE_AVX CONFIG=1) set(CFLAGS_avxsp ${FLAGS_ENABLE_AVX}) set(MACRODEF_avx2dp BASETYPEID=1 ENABLE_AVX2 CONFIG=1) set(CFLAGS_avx2dp ${FLAGS_ENABLE_AVX2}) set(MACRODEF_avx2sp BASETYPEID=2 ENABLE_AVX2 CONFIG=1) set(CFLAGS_avx2sp ${FLAGS_ENABLE_AVX2}) set(MACRODEF_avx512fdp BASETYPEID=1 ENABLE_AVX512F CONFIG=1) set(CFLAGS_avx512fdp ${FLAGS_ENABLE_AVX512F}) set(MACRODEF_avx512fsp BASETYPEID=2 ENABLE_AVX512F CONFIG=1) set(CFLAGS_avx512fsp ${FLAGS_ENABLE_AVX512F}) set(MACRODEF_advsimddp BASETYPEID=1 ENABLE_ADVSIMD CONFIG=1) set(CFLAGS_advsimddp ${FLAGS_ENABLE_ADVSIMD}) set(MACRODEF_advsimdsp BASETYPEID=2 ENABLE_ADVSIMD CONFIG=1) set(CFLAGS_advsimdsp ${FLAGS_ENABLE_ADVSIMD}) set(MACRODEF_neon32sp BASETYPEID=2 ENABLE_NEON32 CONFIG=1) set(CFLAGS_neon32sp ${FLAGS_ENABLE_NEON32}) set(MACRODEF_sve256dp BASETYPEID=1 ENABLE_SVE CONFIG=8) set(CFLAGS_sve256dp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve256sp BASETYPEID=2 ENABLE_SVE CONFIG=8) set(CFLAGS_sve256sp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve512dp BASETYPEID=1 ENABLE_SVE CONFIG=9) set(CFLAGS_sve512dp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve512sp BASETYPEID=2 ENABLE_SVE CONFIG=9) set(CFLAGS_sve512sp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve1024dp BASETYPEID=1 ENABLE_SVE CONFIG=10) set(CFLAGS_sve1024dp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve1024sp BASETYPEID=2 ENABLE_SVE CONFIG=10) set(CFLAGS_sve1024sp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve2048dp BASETYPEID=1 ENABLE_SVE CONFIG=11) set(CFLAGS_sve2048dp ${FLAGS_ENABLE_SVE}) set(MACRODEF_sve2048sp BASETYPEID=2 ENABLE_SVE CONFIG=11) set(CFLAGS_sve2048sp ${FLAGS_ENABLE_SVE}) set(MACRODEF_vsxdp BASETYPEID=1 ENABLE_VSX CONFIG=1) set(CFLAGS_vsxdp ${FLAGS_ENABLE_VSX}) set(MACRODEF_vsxsp BASETYPEID=2 ENABLE_VSX CONFIG=1) set(CFLAGS_vsxsp ${FLAGS_ENABLE_VSX}) # List all available scalar data types set(ISALIST_SP purecsp) set(ISALIST_DP purecdp) set(LIST_SUPPORTED_FPTYPE 0 1) if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") set(ISALIST_SP vecextsp) set(ISALIST_DP vecextdp) endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") if (COMPILER_SUPPORTS_LONG_DOUBLE AND SLEEFDFT_ENABLE_LONGDOUBLE) set(LIST_SUPPORTED_FPTYPE ${LIST_SUPPORTED_FPTYPE} 2) set(ISALIST_QP purecld) if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") set(ISALIST_LD vecextld) endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") endif(COMPILER_SUPPORTS_LONG_DOUBLE AND SLEEFDFT_ENABLE_LONGDOUBLE) if (COMPILER_SUPPORTS_FLOAT128 AND SLEEFDFT_ENABLE_QUAD) set(LIST_SUPPORTED_FPTYPE ${LIST_SUPPORTED_FPTYPE} 3) set(ISALIST_QP purecqp) if(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") set(ISALIST_QP vecextqp) endif(CMAKE_C_COMPILER_ID MATCHES "(GNU|Clang)") endif(COMPILER_SUPPORTS_FLOAT128 AND SLEEFDFT_ENABLE_QUAD) # List all available vector data types if (COMPILER_SUPPORTS_SSE4) set(ISALIST_SP ${ISALIST_SP} sse2sp) set(ISALIST_DP ${ISALIST_DP} sse2dp) endif(COMPILER_SUPPORTS_SSE4) if (COMPILER_SUPPORTS_AVX) set(ISALIST_SP ${ISALIST_SP} avxsp) set(ISALIST_DP ${ISALIST_DP} avxdp) endif(COMPILER_SUPPORTS_AVX) if (COMPILER_SUPPORTS_AVX2) set(ISALIST_SP ${ISALIST_SP} avx2sp) set(ISALIST_DP ${ISALIST_DP} avx2dp) endif(COMPILER_SUPPORTS_AVX2) if (COMPILER_SUPPORTS_AVX512F) set(ISALIST_SP ${ISALIST_SP} avx512fsp) set(ISALIST_DP ${ISALIST_DP} avx512fdp) endif(COMPILER_SUPPORTS_AVX512F) if (COMPILER_SUPPORTS_ADVSIMD) set(ISALIST_SP ${ISALIST_SP} advsimdsp) set(ISALIST_DP ${ISALIST_DP} advsimddp) endif(COMPILER_SUPPORTS_ADVSIMD) if (COMPILER_SUPPORTS_SVE) set(ISALIST_SP ${ISALIST_SP} sve256sp sve512sp sve1024sp sve2048sp) set(ISALIST_DP ${ISALIST_DP} sve256dp sve512dp sve1024dp sve2048dp) endif(COMPILER_SUPPORTS_SVE) if (COMPILER_SUPPORTS_NEON32) set(ISALIST_SP ${ISALIST_SP} neon32sp) endif(COMPILER_SUPPORTS_NEON32) if (COMPILER_SUPPORTS_VSX) set(ISALIST_SP ${ISALIST_SP} vsxsp) set(ISALIST_DP ${ISALIST_DP} vsxdp) endif(COMPILER_SUPPORTS_VSX) if(SLEEFDFT_ENABLE_STREAM) set(NLIST 0 1 2 3) else() set(NLIST 0 2) endif() # Target mkunroll set(TARGET_MKUNROLL "mkunroll") add_host_executable(${TARGET_MKUNROLL} mkunroll.c) set_target_properties(${TARGET_MKUNROLL} PROPERTIES ${COMMON_TARGET_PROPERTIES}) if (NOT CMAKE_CROSSCOMPILING) target_compile_definitions(${TARGET_MKUNROLL} PRIVATE ${COMMON_TARGET_DEFINITIONS}) endif() # Target mkdispatch set(TARGET_MKDISPATCH "mkdispatch") add_host_executable(${TARGET_MKDISPATCH} mkdispatch.c) set_target_properties(${TARGET_MKDISPATCH} PROPERTIES ${COMMON_TARGET_PROPERTIES}) if (NOT CMAKE_CROSSCOMPILING) target_compile_definitions(${TARGET_MKDISPATCH} PRIVATE ${COMMON_TARGET_DEFINITIONS}) endif() # Target dispatchparam.h add_custom_command(OUTPUT dispatchparam.h COMMENT "Generating dispatchparam.h" COMMAND $ paramonly ${SLEEFDFT_MAXBUTWIDTH} ${ISALIST_DP} > ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h DEPENDS ${TARGET_MKDISPATCH} ) add_custom_target(dispatchparam.h_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h) # Target dispatch*.h foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(TOUPPER ${ST} CST) # CST is "DP" list(GET LISTLONGTYPENAME ${T} LT) # LT is "double" list(GET LISTTYPEID ${T} ID) # ID is 1 string(CONCAT S "dispatch" ${ST} ".h") # S is dispatchdp.h add_custom_command(OUTPUT ${S} COMMENT "Generating ${S}" COMMAND $ ${LT} ${SLEEFDFT_MAXBUTWIDTH} ${ISALIST_${CST}} > ${S} DEPENDS ${TARGET_MKDISPATCH} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) string(CONCAT G ${S} "_generated") # G is dispatchdp.h_generated add_custom_target(${G} SOURCES ${S}) endforeach() # Target dftcommon.o add_library(dftcommon_obj OBJECT dftcommon.c dftcommon.h ${CMAKE_CURRENT_BINARY_DIR}/dispatchparam.h ${CMAKE_BINARY_DIR}/include/sleef.h) add_dependencies(dftcommon_obj ${TARGET_HEADERS} dispatchparam.h_generated) set_source_files_properties(${CMAKE_BINARY_DIR}/include/sleef.h PROPERTIES GENERATED TRUE) set_target_properties(dftcommon_obj PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_compile_definitions(dftcommon_obj PRIVATE ${COMMON_TARGET_DEFINITIONS}) # Target dft*.o foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(CONCAT G "dft" ${ST} "_obj") # G is "dftdp_obj" string(CONCAT S "dispatch" ${ST} ".h") # S is "dispatchdp.h" add_library(${G} OBJECT dft.c dftcommon.h ${S}) string(CONCAT SG ${S} "_generated") # SG is "dispatchdp.h_generated" add_dependencies(${G} ${SG} ${TARGET_HEADERS}) set_target_properties(${G} PROPERTIES ${COMMON_TARGET_PROPERTIES}) list(GET LISTTYPEID ${T} ID) # ID is 1 target_compile_definitions(${G} PRIVATE BASETYPEID=${ID} ${COMMON_TARGET_DEFINITIONS}) endforeach() # Copy unroll0.org to ${CMAKE_CURRENT_BINARY_DIR} add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/unroll0.org COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/unroll0.org ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/unroll0.org) add_custom_target(unroll0.org.copied DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/unroll0.org) # Target unroll*.c foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(TOUPPER ${ST} CST) # CST is "DP" list(GET LISTLONGTYPENAME ${T} LT) # LT is "double" foreach(E ${ISALIST_${CST}}) # E is "sse2dp" foreach(N ${NLIST}) string(CONCAT UC unroll_ ${N} _ ${E} ".c") # UC is "unroll_0_sse2dp.c" set(UNROLL_TARGET_${CST} ${UNROLL_TARGET_${CST}} ${UC}) endforeach() endforeach() message(STATUS "Unroll target for ${CST} : ${UNROLL_TARGET_${CST}}") if(UNROLL_TARGET_${CST}) add_custom_command(OUTPUT ${UNROLL_TARGET_${CST}} COMMENT "Generating ${UNROLL_TARGET_${CST}}" COMMAND $ ${LT} ${ISALIST_${CST}} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} DEPENDS ${TARGET_MKUNROLL} unroll0.org.copied ) add_custom_target(unroll_target_${ST} DEPENDS ${UNROLL_TARGET_${CST}}) endif() endforeach() # Target unroll*.o foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(TOUPPER ${ST} CST) # CST is "DP" list(GET LISTLONGTYPENAME ${T} LT) # LT is "double" foreach(E ${ISALIST_${CST}}) # E is "sse2dp" foreach(N ${NLIST}) string(CONCAT U unroll_ ${N} _ ${E}) # U is "unroll_0_sse2dp" string(CONCAT UG ${U} "_obj") # UG is "unroll_0_sse2dp_obj" string(CONCAT UC ${U} ".c") # UC is "unroll_0_sse2dp.c" add_library(${UG} OBJECT ${UC}) set_target_properties(${UG} PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_include_directories(${UG} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_compile_definitions(${UG} PRIVATE ${COMMON_TARGET_DEFINITIONS} ${MACRODEF_${E}}) target_compile_options(${UG} PRIVATE ${CFLAGS_${E}}) add_dependencies(${UG} ${TARGET_HEADERS} unroll_target_${ST}) endforeach() endforeach() endforeach() # Target libdft add_library(${TARGET_LIBDFT} $ $) target_link_libraries(${TARGET_LIBDFT} ${TARGET_LIBSLEEF} ${LIBM}) foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(CONCAT G "dft" ${ST} "_obj") # G is "dftdp_obj" target_sources(${TARGET_LIBDFT} PRIVATE $) endforeach() foreach(T ${LIST_SUPPORTED_FPTYPE}) list(GET LISTSHORTTYPENAME ${T} ST) # ST is "dp", for example string(TOUPPER ${ST} CST) # CST is "DP" foreach(E ${ISALIST_${CST}}) # E is "sse2dp" foreach(N ${NLIST}) string(CONCAT UG unroll_ ${N} _ ${E} "_obj") # U is "unroll_0_sse2dp_obj" target_sources(${TARGET_LIBDFT} PRIVATE $) endforeach() endforeach() endforeach() set_target_properties(${TARGET_LIBDFT} PROPERTIES VERSION ${SLEEF_VERSION} SOVERSION ${SLEEF_SOVERSION} ${COMMON_TARGET_PROPERTIES} ) # Install install(FILES ${PROJECT_SOURCE_DIR}/include/sleefdft.h DESTINATION include) install(TARGETS ${TARGET_LIBDFT} DESTINATION lib) sleef-3.3.1/src/dft/dft.c000066400000000000000000001410621333715643700151210ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include "sleef.h" #include "misc.h" #include "common.h" #include "arraymap.h" #include "dftcommon.h" #ifdef _OPENMP #include #endif #if BASETYPEID == 1 typedef double real; typedef Sleef_double2 sc_t; #define BASETYPESTRING "double" #define MAGIC 0x27182818 #define MAGIC2D 0x17320508 #define INIT SleefDFT_double_init1d #define EXECUTE SleefDFT_double_execute #define INIT2D SleefDFT_double_init2d #define CTBL ctbl_double #define REALSUB0 realSub0_double #define REALSUB1 realSub1_double #define GETINT getInt_double #define GETPTR getPtr_double #define DFTF dftf_double #define DFTB dftb_double #define TBUTF tbutf_double #define TBUTB tbutb_double #define BUTF butf_double #define BUTB butb_double #define SINCOSPI Sleef_sincospi_u05 #include "dispatchdp.h" #elif BASETYPEID == 2 typedef float real; typedef Sleef_float2 sc_t; #define BASETYPESTRING "float" #define MAGIC 0x31415926 #define MAGIC2D 0x22360679 #define INIT SleefDFT_float_init1d #define EXECUTE SleefDFT_float_execute #define INIT2D SleefDFT_float_init2d #define CTBL ctbl_float #define REALSUB0 realSub0_float #define REALSUB1 realSub1_float #define GETINT getInt_float #define GETPTR getPtr_float #define DFTF dftf_float #define DFTB dftb_float #define TBUTF tbutf_float #define TBUTB tbutb_float #define BUTF butf_float #define BUTB butb_float #define SINCOSPI Sleef_sincospif_u05 #include "dispatchsp.h" #elif BASETYPEID == 3 typedef long double real; typedef Sleef_longdouble2 sc_t; #define BASETYPESTRING "long double" #define MAGIC 0x14142135 #define MAGIC2D 0x26457513 #define INIT SleefDFT_longdouble_init1d #define EXECUTE SleefDFT_longdouble_execute #define INIT2D SleefDFT_longdouble_init2d #define CTBL ctbl_longdouble #define REALSUB0 realSub0_longdouble #define REALSUB1 realSub1_longdouble #define GETINT getInt_longdouble #define GETPTR getPtr_longdouble #define DFTF dftf_longdouble #define DFTB dftb_longdouble #define TBUTF tbutf_longdouble #define TBUTB tbutb_longdouble #define BUTF butf_longdouble #define BUTB butb_longdouble #define SINCOSPI Sleef_sincospil_u05 #include "dispatchld.h" #elif BASETYPEID == 4 typedef Sleef_quad real; typedef Sleef_quad2 sc_t; #define BASETYPESTRING "Sleef_quad" #define MAGIC 0x33166247 #define MAGIC2D 0x36055512 #define INIT SleefDFT_quad_init1d #define EXECUTE SleefDFT_quad_execute #define INIT2D SleefDFT_quad_init2d #define CTBL ctbl_Sleef_quad #define REALSUB0 realSub0_Sleef_quad #define REALSUB1 realSub1_Sleef_quad #define GETINT getInt_Sleef_quad #define GETPTR getPtr_Sleef_quad #define DFTF dftf_Sleef_quad #define DFTB dftb_Sleef_quad #define TBUTF tbutf_Sleef_quad #define TBUTB tbutb_Sleef_quad #define BUTF butf_Sleef_quad #define BUTB butb_Sleef_quad #define SINCOSPI Sleef_sincospiq_u05 #include "dispatchqp.h" #else #error No BASETYPEID specified #endif #define IMPORT_IS_EXPORT #include "sleefdft.h" // #if BASETYPEID == 4 real CTBL[] = { 0.7071067811865475243818940365159164684883Q, -0.7071067811865475243818940365159164684883Q, 0.9238795325112867561014214079495587839119Q, -0.382683432365089771723257530688933059082Q, 0.382683432365089771723257530688933059082Q, -0.9238795325112867561014214079495587839119Q, #if MAXBUTWIDTH >= 5 0.9807852804032304491190993878113602022495Q, -0.1950903220161282678433729148581576851029Q, 0.5555702330196022247573058028269343822103Q, -0.8314696123025452370808655033762590846891Q, 0.8314696123025452370808655033762590846891Q, -0.5555702330196022247573058028269343822103Q, 0.1950903220161282678433729148581576851029Q, -0.9807852804032304491190993878113602022495Q, #endif #if MAXBUTWIDTH >= 6 0.9951847266721968862310254699821143731242Q, -0.09801714032956060199569840382660679267701Q, 0.6343932841636454982026105398063009488396Q, -0.7730104533627369607965383602188325085081Q, 0.881921264348355029715105513066220055407Q, -0.4713967368259976485449225247492677226546Q, 0.2902846772544623676448431737195932100803Q, -0.9569403357322088649310892760624369657307Q, 0.9569403357322088649310892760624369657307Q, -0.2902846772544623676448431737195932100803Q, 0.4713967368259976485449225247492677226546Q, -0.881921264348355029715105513066220055407Q, 0.7730104533627369607965383602188325085081Q, -0.6343932841636454982026105398063009488396Q, 0.09801714032956060199569840382660679267701Q, -0.9951847266721968862310254699821143731242Q, #endif #if MAXBUTWIDTH >= 7 0.9987954562051723927007702841240899260811Q, -0.04906767432741801425355085940205324135377Q, 0.6715589548470184006194634573905233310143Q, -0.7409511253549590911932944126139233276263Q, 0.9039892931234433315823215138173907234886Q, -0.427555093430282094315230886905077056781Q, 0.336889853392220050702686798271834334173Q, -0.9415440651830207783906830087961026265475Q, 0.9700312531945439926159106824865574481009Q, -0.2429801799032638899447731489766866275204Q, 0.5141027441932217266072797923204262815489Q, -0.8577286100002720698929313536407192941624Q, 0.8032075314806449097991200569701675249235Q, -0.5956993044924333434615715265891822127742Q, 0.1467304744553617516588479505190711904561Q, -0.9891765099647809734561415551112872890371Q, 0.9891765099647809734561415551112872890371Q, -0.1467304744553617516588479505190711904561Q, 0.5956993044924333434615715265891822127742Q, -0.8032075314806449097991200569701675249235Q, 0.8577286100002720698929313536407192941624Q, -0.5141027441932217266072797923204262815489Q, 0.2429801799032638899447731489766866275204Q, -0.9700312531945439926159106824865574481009Q, 0.9415440651830207783906830087961026265475Q, -0.336889853392220050702686798271834334173Q, 0.427555093430282094315230886905077056781Q, -0.9039892931234433315823215138173907234886Q, 0.7409511253549590911932944126139233276263Q, -0.6715589548470184006194634573905233310143Q, 0.04906767432741801425355085940205324135377Q, -0.9987954562051723927007702841240899260811Q, #endif }; #else real CTBL[] = { 0.7071067811865475243818940365159164684883L, -0.7071067811865475243818940365159164684883L, 0.9238795325112867561014214079495587839119L, -0.382683432365089771723257530688933059082L, 0.382683432365089771723257530688933059082L, -0.9238795325112867561014214079495587839119L, #if MAXBUTWIDTH >= 5 0.9807852804032304491190993878113602022495L, -0.1950903220161282678433729148581576851029L, 0.5555702330196022247573058028269343822103L, -0.8314696123025452370808655033762590846891L, 0.8314696123025452370808655033762590846891L, -0.5555702330196022247573058028269343822103L, 0.1950903220161282678433729148581576851029L, -0.9807852804032304491190993878113602022495L, #endif #if MAXBUTWIDTH >= 6 0.9951847266721968862310254699821143731242L, -0.09801714032956060199569840382660679267701L, 0.6343932841636454982026105398063009488396L, -0.7730104533627369607965383602188325085081L, 0.881921264348355029715105513066220055407L, -0.4713967368259976485449225247492677226546L, 0.2902846772544623676448431737195932100803L, -0.9569403357322088649310892760624369657307L, 0.9569403357322088649310892760624369657307L, -0.2902846772544623676448431737195932100803L, 0.4713967368259976485449225247492677226546L, -0.881921264348355029715105513066220055407L, 0.7730104533627369607965383602188325085081L, -0.6343932841636454982026105398063009488396L, 0.09801714032956060199569840382660679267701L, -0.9951847266721968862310254699821143731242L, #endif #if MAXBUTWIDTH >= 7 0.9987954562051723927007702841240899260811L, -0.04906767432741801425355085940205324135377L, 0.6715589548470184006194634573905233310143L, -0.7409511253549590911932944126139233276263L, 0.9039892931234433315823215138173907234886L, -0.427555093430282094315230886905077056781L, 0.336889853392220050702686798271834334173L, -0.9415440651830207783906830087961026265475L, 0.9700312531945439926159106824865574481009L, -0.2429801799032638899447731489766866275204L, 0.5141027441932217266072797923204262815489L, -0.8577286100002720698929313536407192941624L, 0.8032075314806449097991200569701675249235L, -0.5956993044924333434615715265891822127742L, 0.1467304744553617516588479505190711904561L, -0.9891765099647809734561415551112872890371L, 0.9891765099647809734561415551112872890371L, -0.1467304744553617516588479505190711904561L, 0.5956993044924333434615715265891822127742L, -0.8032075314806449097991200569701675249235L, 0.8577286100002720698929313536407192941624L, -0.5141027441932217266072797923204262815489L, 0.2429801799032638899447731489766866275204L, -0.9700312531945439926159106824865574481009L, 0.9415440651830207783906830087961026265475L, -0.336889853392220050702686798271834334173L, 0.427555093430282094315230886905077056781L, -0.9039892931234433315823215138173907234886L, 0.7409511253549590911932944126139233276263L, -0.6715589548470184006194634573905233310143L, 0.04906767432741801425355085940205324135377L, -0.9987954562051723927007702841240899260811L, #endif }; #endif #ifndef ENABLE_STREAM #error ENABLE_STREAM not defined #endif static const int constK[] = { 0, 2, 6, 14, 38, 94, 230, 542, 1254 }; extern const char *configStr[]; extern int planFilePathSet; // Utility functions static jmp_buf sigjmp; static void sighandler(int signum) { longjmp(sigjmp, 1); } static int checkISAAvailability(int isa) { signal(SIGILL, sighandler); if (setjmp(sigjmp) == 0) { int ret = GETINT[isa] != NULL && (*GETINT[isa])(BASETYPEID); signal(SIGILL, SIG_DFL); return ret; } signal(SIGILL, SIG_DFL); return 0; } #ifdef _OPENMP static int omp_thread_count() { int n = 0; #pragma omp parallel reduction(+:n) n += 1; return n; } #endif static void startAllThreads(const int nth) { #ifdef _OPENMP volatile int8_t *state = calloc(nth, 1); int th; #pragma omp parallel for for(th=0;thlog2len; if (level == N) { if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { void (*func)(real *, const real *, const int) = DFTF[config][p->isa][N]; (*func)(d, s, log2len-N); } else { void (*func)(real *, const real *, const int) = DFTB[config][p->isa][N]; (*func)(d, s, log2len-N); } } else if (level == log2len) { assert(p->vecwidth <= (1 << N)); if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { void (*func)(real *, uint32_t *, const real *, const int, const real *, const int) = TBUTF[config][p->isa][N]; (*func)(d, p->perm[level], s, log2len-N, p->tbl[N][level], K); } else { void (*func)(real *, uint32_t *, const real *, const int, const real *, const int) = TBUTB[config][p->isa][N]; (*func)(d, p->perm[level], s, log2len-N, p->tbl[N][level], K); } } else { if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { void (*func)(real *, uint32_t *, const int, const real *, const int, const real *, const int) = BUTF[config][p->isa][N]; (*func)(d, p->perm[level], log2len-level, s, log2len-N, p->tbl[N][level], K); } else { void (*func)(real *, uint32_t *, const int, const real *, const int, const real *, const int) = BUTB[config][p->isa][N]; (*func)(d, p->perm[level], log2len-level, s, log2len-N, p->tbl[N][level], K); } } } // Transposer #if defined(__GNUC__) && __GNUC__ < 5 // This is another workaround of a bug in gcc-4 #define LOG2BS 3 #else #define LOG2BS 4 #endif #define BS (1 << LOG2BS) #define TRANSPOSE_BLOCK(y2) do { \ for(int x2=y2+1;x2= N-1) return cnt; const int level = levelorg - levelinc; if (bot - top > 4) { const int bl = 1 << (N - levelinc); const int w = bl/4; for(int j=0;j<(bot-top)/bl;j++) { for(int i=0;i> 1) | ((k & 0x55555555) << 1)); r = (((r & 0xcccccccc) >> 2) | ((r & 0x33333333) << 2)); r = (((r & 0xf0f0f0f0) >> 4) | ((r & 0x0f0f0f0f) << 4)); r = (((r & 0xff00ff00) >> 8) | ((r & 0x00ff00ff) << 8)); r = ((r >> 16) | (r << 16)) >> (32-nbits); return (((r << s) | (k & ~(-1 << s))) & ~(-1 << d)) | ((((k >> s) | (r & (-1 << (nbits-s)))) << d) & ~(-1 << nbits)); } static real **makeTable(int sign, int vecwidth, int log2len, const int N, const int K) { if (log2len < N) return NULL; int *p = (int *)malloc(sizeof(int)*((N+1)<bestTime = tm; for(uint32_t j = 0;j < p->log2len+1;j++) { p->bestPathConfig[j] = pathConfig[j]; p->bestPath[j] = path[j]; } return nTrial; } if (level < 1) return nTrial-1; for(int i=0;i<10;i++) { int N; do { N = 1 + rand() % MAXBUTWIDTH; } while(p->tm[0][level*(MAXBUTWIDTH+1)+N] >= 1ULL << 60); if (p->vecwidth > (1 << N) || N == p->log2len) continue; path[level] = N; for(;;) { pathConfig[level] = rand() % CONFIGMAX; #if ENABLE_STREAM == 0 pathConfig[level] &= ~1; #endif if ((p->mode2 & SLEEF_MODE2_MT1D) == 0 && (pathConfig[level] & CONFIG_MT) != 0) continue; break; } for(int j = level-1;j >= 0;j--) path[j] = 0; nTrial = searchForRandomPathRecurse(p, level - N, path, pathConfig, 0, nTrial); if (nTrial <= 0) break; if (p->bestTime < 1ULL << 60) break; } return nTrial - 1; } // Planner #define NSHORTESTPATHS 15 #define MAXPATHLEN (MAXLOG2LEN+1) #define POSMAX (CONFIGMAX * MAXLOG2LEN * (MAXBUTWIDTH+1)) static int cln2pos(int config, int level, int N) { return (config * MAXLOG2LEN + level) * MAXBUTWIDTH + N; } static int pos2config(int pos) { return pos == -1 ? -1 : ((pos - 1) / (MAXBUTWIDTH * MAXLOG2LEN)); } static int pos2level(int pos) { return pos == -1 ? -1 : (((pos - 1) / MAXBUTWIDTH) % MAXLOG2LEN); } static int pos2N(int pos) { return pos == -1 ? -1 : ((pos - 1) % MAXBUTWIDTH + 1); } typedef struct { SleefDFT *p; int countu[POSMAX]; int path[NSHORTESTPATHS][MAXPATHLEN]; int pathLen[NSHORTESTPATHS]; uint64_t cost[NSHORTESTPATHS]; int nPaths; int *heap; int *heapLen; uint64_t *heapCost; int heapSize, nPathsInHeap; } ks_t; static ks_t *ksInit(SleefDFT *p) { ks_t *q = calloc(1, sizeof(ks_t)); q->p = p; q->heapSize = 10; q->heap = calloc(q->heapSize, sizeof(int)*MAXPATHLEN); q->heapCost = calloc(q->heapSize, sizeof(uint64_t)); q->heapLen = calloc(q->heapSize, sizeof(int)); return q; } static void ksDispose(ks_t *q) { free(q->heapCost); free(q->heapLen); free(q->heap); free(q); } // returns the number of paths in the heap static int ksSize(ks_t *q) { return q->nPathsInHeap; } // adds a path to the heap static void ksAddPath(ks_t *q, int *path, int pathLen, uint64_t cost) { assert(pathLen <= MAXPATHLEN); if (q->nPathsInHeap == q->heapSize) { q->heapSize *= 2; q->heap = realloc(q->heap, q->heapSize * sizeof(int)*MAXPATHLEN); q->heapCost = realloc(q->heapCost, q->heapSize * sizeof(uint64_t)); q->heapLen = realloc(q->heapLen, q->heapSize * sizeof(int)); } for(int i=0;iheap[q->nPathsInHeap * MAXPATHLEN + i] = path[i]; q->heapLen[q->nPathsInHeap] = pathLen; q->heapCost[q->nPathsInHeap] = cost; q->nPathsInHeap++; } // returns the cost of n-th paths in the heap static uint64_t ksCost(ks_t *q, int n) { assert(0 <= n && n < q->nPathsInHeap); return q->heapCost[n]; } // copies the n-th paths in the heap to path, returns its length static int ksGetPath(ks_t *q, int *path, int n) { assert(0 <= n && n < q->nPathsInHeap); int len = q->heapLen[n]; for(int i=0;iheap[n * MAXPATHLEN + i]; return len; } // removes the n-th paths in the heap static void ksRemove(ks_t *q, int n) { assert(0 <= n && n < q->nPathsInHeap); for(int i=n;inPathsInHeap-1;i++) { int len = q->heapLen[i+1]; assert(len < MAXPATHLEN); for(int j=0;jheap[i * MAXPATHLEN + j] = q->heap[(i+1) * MAXPATHLEN + j]; q->heapLen[i] = q->heapLen[i+1]; q->heapCost[i] = q->heapCost[i+1]; } q->nPathsInHeap--; } // returns the countu value at pos static int ksCountu(ks_t *q, int pos) { assert(0 <= pos && pos < POSMAX); return q->countu[pos]; } // set the countu value at pos to n static void ksSetCountu(ks_t *q, int pos, int n) { assert(0 <= pos && pos < POSMAX); q->countu[pos] = n; } // adds a path as one of the best k paths, returns the number best paths static int ksAddBestPath(ks_t *q, int *path, int pathLen, uint64_t cost) { assert(pathLen <= MAXPATHLEN); assert(q->nPaths < NSHORTESTPATHS); for(int i=0;ipath[q->nPaths][i] = path[i]; q->pathLen[q->nPaths] = pathLen; q->cost[q->nPaths] = cost; q->nPaths++; return q->nPaths; } // returns if pos is a destination static int ksIsDest(ks_t *q, int pos) { return pos2level(pos) == 0; } // returns n-th adjacent nodes at pos. static int ksAdjacent(ks_t *q, int pos, int n) { if (pos != -1 && pos2level(pos) == 0) return -1; int NMAX = MIN(MIN(q->p->log2len, MAXBUTWIDTH+1), q->p->log2len - q->p->log2vecwidth + 1); if (pos == -1) { int N = n / 2 + MAX(q->p->log2vecwidth, 1); if (N >= NMAX) return -1; return cln2pos((n & 1) * CONFIG_MT, q->p->log2len, N); } int config = (pos2config(pos) & CONFIG_MT); int N = n + 1; int level = pos2level(pos) - pos2N(pos); if (level < 0 || N >= NMAX) return -1; if (level == 0) return n == 0 ? cln2pos(0, 0, 0) : -1; return cln2pos(config, level, N); } static uint64_t ksAdjacentCost(ks_t *q, int pos, int n) { int nxpos = ksAdjacent(q, pos, n); if (nxpos == -1) return 0; int config = pos2config(nxpos), level = pos2level(nxpos), N = pos2N(nxpos); uint64_t ret0 = q->p->tm[config | 0][level*(MAXBUTWIDTH+1) + N]; uint64_t ret1 = q->p->tm[config | 1][level*(MAXBUTWIDTH+1) + N]; return MIN(ret0, ret1); } static void searchForBestPath(SleefDFT *p) { ks_t *q = ksInit(p); for(int i=0;;i++) { int v = ksAdjacent(q, -1, i); if (v == -1) break; uint64_t c = ksAdjacentCost(q, -1, i); int path[1] = { v }; ksAddPath(q, path, 1, c); } while(ksSize(q) != 0) { uint64_t bestCost = 1ULL << 60; int bestPathNum = -1; for(int i=0;i= NSHORTESTPATHS) continue; ksSetCountu(q, lastPos, ksCountu(q, lastPos)+1); if (ksIsDest(q, lastPos)) { if (ksAddBestPath(q, path, pathLen, cost) >= NSHORTESTPATHS) break; continue; } for(int i=0;;i++) { int v = ksAdjacent(q, lastPos, i); if (v == -1) break; assert(0 <= pos2N(v) && pos2N(v) <= q->p->log2len); uint64_t c = ksAdjacentCost(q, lastPos, i); path[pathLen] = v; ksAddPath(q, path, pathLen+1, cost + c); } } for(int j = p->log2len;j >= 0;j--) p->bestPath[j] = 0; if (((p->mode & SLEEF_MODE_MEASURE) != 0 || (planFilePathSet && (p->mode & SLEEF_MODE_MEASUREBITS) == 0))) { uint64_t besttm = 1ULL << 62; int bestPath = -1; const int niter = 1 + 5000000 / ((1 << p->log2len) + 1); real *s2 = NULL, *d2 = NULL; const real *s = p->in == NULL ? (s2 = (real *)memset(Sleef_malloc((2 << p->log2len) * sizeof(real)), 0, sizeof(real) * (2 << p->log2len))) : p->in; real *d = p->out == NULL ? (d2 = (real *)memset(Sleef_malloc((2 << p->log2len) * sizeof(real)), 0, sizeof(real) * (2 << p->log2len))) : p->out; #ifdef _OPENMP const int tn = omp_get_thread_num(); #else const int tn = 0; #endif real *t[] = { p->x1[tn], p->x0[tn], d }; for(int mt=0;mt<2;mt++) { for(int i=q->nPaths-1;i>=0;i--) { if (((pos2config(q->path[i][0]) & CONFIG_MT) != 0) != mt) continue; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) { for(int j=0;jpathLen[i];j++) { int N = pos2N(q->path[i][j]); int level = pos2level(q->path[i][j]); int config = pos2config(q->path[i][j]) & ~1; uint64_t t0 = q->p->tm[config | 0][level*(MAXBUTWIDTH+1) + N]; uint64_t t1 = q->p->tm[config | 1][level*(MAXBUTWIDTH+1) + N]; config = t0 < t1 ? config : (config | 1); if (N != 0) printf("%d(%s) ", N, configStr[config]); } } if (mt) startAllThreads(p->nThread); uint64_t tm0 = Sleef_currentTimeMicros(); for(int k=0;kpathLen & 1) == 1) nb = -1; for(int level = p->log2len, j=0;level >= 1;j++) { assert(pos2level(q->path[i][j]) == level); int N = pos2N(q->path[i][j]); int config = pos2config(q->path[i][j]) & ~1; uint64_t t0 = q->p->tm[config | 0][level*(MAXBUTWIDTH+1) + N]; uint64_t t1 = q->p->tm[config | 1][level*(MAXBUTWIDTH+1) + N]; config = t0 < t1 ? config : (config | 1); dispatch(p, N, t[nb+1], lb, level, config); level -= N; lb = t[nb+1]; nb = (nb + 1) & 1; } } uint64_t tm1 = Sleef_currentTimeMicros(); for(int k=0;kpathLen & 1) == 1) nb = -1; for(int level = p->log2len, j=0;level >= 1;j++) { assert(pos2level(q->path[i][j]) == level); int N = pos2N(q->path[i][j]); int config = pos2config(q->path[i][j]) & ~1; uint64_t t0 = q->p->tm[config | 0][level*(MAXBUTWIDTH+1) + N]; uint64_t t1 = q->p->tm[config | 1][level*(MAXBUTWIDTH+1) + N]; config = t0 < t1 ? config : (config | 1); dispatch(p, N, t[nb+1], lb, level, config); level -= N; lb = t[nb+1]; nb = (nb + 1) & 1; } } uint64_t tm2 = Sleef_currentTimeMicros(); if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf(" : %lld %lld\n", (long long int)(tm1 - tm0), (long long int)(tm2 - tm1)); if ((tm1 - tm0) < besttm) { bestPath = i; besttm = tm1 - tm0; } if ((tm2 - tm1) < besttm) { bestPath = i; besttm = tm2 - tm1; } } } for(int level = p->log2len, j=0;level >= 1;j++) { assert(pos2level(q->path[bestPath][j]) == level); int N = pos2N(q->path[bestPath][j]); int config = pos2config(q->path[bestPath][j]) & ~1; uint64_t t0 = q->p->tm[config | 0][level*(MAXBUTWIDTH+1) + N]; uint64_t t1 = q->p->tm[config | 1][level*(MAXBUTWIDTH+1) + N]; config = t0 < t1 ? config : (config | 1); p->bestPath[level] = N; p->bestPathConfig[level] = config; level -= N; } if (d2 != NULL) Sleef_free(d2); if (s2 != NULL) Sleef_free(s2); } else { for(int level = p->log2len, j=0;level >= 1;j++) { int bestPath = 0; assert(pos2level(q->path[bestPath][j]) == level); int N = pos2N(q->path[bestPath][j]); int config = pos2config(q->path[bestPath][j]); p->bestPath[level] = N; p->bestPathConfig[level] = config; level -= N; } } ksDispose(q); } // static uint64_t estimate(int log2len, int level, int N, int config) { uint64_t ret = N * 1000 + ABS(N-3) * 1000; if (log2len >= 14 && (config & CONFIG_MT) != 0) ret /= 2; return ret; } static void measureBut(SleefDFT *p) { if (p->x0 == NULL) return; // #ifdef _OPENMP const int tn = omp_get_thread_num(); #else const int tn = 0; #endif real *s = (real *)memset(p->x0[tn], 0, sizeof(real) * (2 << p->log2len)); real *d = (real *)memset(p->x1[tn], 0, sizeof(real) * (2 << p->log2len)); const int niter = 1 + 100000 / ((1 << p->log2len) + 1); #define MEASURE_REPEAT 4 for(int rep=1;rep<=MEASURE_REPEAT;rep++) { for(int config=0;configmode2 & SLEEF_MODE2_MT1D) == 0 && (config & CONFIG_MT) != 0) continue; for(uint32_t level = p->log2len;level >= 1;level--) { for(uint32_t N=1;N<=MAXBUTWIDTH;N++) { if (level < N || p->log2len <= N) continue; if (level == N) { if ((int)p->log2len - (int)level < p->log2vecwidth) continue; uint64_t tm = Sleef_currentTimeMicros(); for(int i=0;itm[config][level*(MAXBUTWIDTH+1)+N] = MIN(p->tm[config][level*(MAXBUTWIDTH+1)+N], tm); } else if (level == p->log2len) { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > (1 << N)) continue; if ((config & CONFIG_MT) != 0) { int i1; #ifdef _OPENMP #pragma omp parallel for #endif for(i1=0;i1 < (1 << (p->log2len-N-p->log2vecwidth));i1++) { int i0 = i1 << p->log2vecwidth; p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } } else { for(int i0=0, i1=0;i0 < (1 << (p->log2len-N));i0+=p->vecwidth, i1++) { p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } } uint64_t tm = Sleef_currentTimeMicros(); for(int i=0;itm[config][level*(MAXBUTWIDTH+1)+N] = MIN(p->tm[config][level*(MAXBUTWIDTH+1)+N], tm); } else { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > 2 && p->log2len <= N+2) continue; if ((int)p->log2len - (int)level < p->log2vecwidth) continue; if ((config & CONFIG_MT) != 0) { int i1; #ifdef _OPENMP #pragma omp parallel for #endif for(i1=0;i1 < (1 << (p->log2len-N-p->log2vecwidth));i1++) { int i0 = i1 << p->log2vecwidth; p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } } else { for(int i0=0, i1=0;i0 < (1 << (p->log2len-N));i0+=p->vecwidth, i1++) { p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } } uint64_t tm = Sleef_currentTimeMicros(); for(int i=0;itm[config][level*(MAXBUTWIDTH+1)+N] = MIN(p->tm[config][level*(MAXBUTWIDTH+1)+N], tm); } } } } } if ((p->mode & SLEEF_MODE_VERBOSE) != 0) { for(uint32_t level = p->log2len;level >= 1;level--) { for(uint32_t N=1;N<=MAXBUTWIDTH;N++) { if (level < N || p->log2len <= N) continue; if (level == N) { if ((int)p->log2len - (int)level < p->log2vecwidth) continue; printf("bot %d, %d, %d, ", p->log2len, level, N); for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] == 1ULL << 60) { printf("N/A, "); } else { printf("%lld, ", (long long int)p->tm[config][level*(MAXBUTWIDTH+1)+N]); } } printf("\n"); } else if (level == p->log2len) { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > (1 << N)) continue; printf("top %d, %d, %d, ", p->log2len, level, N); for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] == 1ULL << 60) { printf("N/A, "); } else { printf("%lld, ", (long long int)p->tm[config][level*(MAXBUTWIDTH+1)+N]); } } printf("\n"); } else { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > 2 && p->log2len <= N+2) continue; if ((int)p->log2len - (int)level < p->log2vecwidth) continue; printf("mid %d, %d, %d, ", p->log2len, level, N); for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] == 1ULL << 60) { printf("N/A, "); } else { printf("%lld, ", (long long int)p->tm[config][level*(MAXBUTWIDTH+1)+N]); } } printf("\n"); } } } } } static void estimateBut(SleefDFT *p) { for(uint32_t level = p->log2len;level >= 1;level--) { for(uint32_t N=1;N<=MAXBUTWIDTH;N++) { if (level < N || p->log2len <= N) continue; if (level == N) { if ((int)p->log2len - (int)level < p->log2vecwidth) continue; for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] = estimate(p->log2len, level, N, config); } } else if (level == p->log2len) { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > (1 << N)) continue; for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] = estimate(p->log2len, level, N, config); } } else { if (p->tbl[N] == NULL || p->tbl[N][level] == NULL) continue; if (p->vecwidth > 2 && p->log2len <= N+2) continue; if ((int)p->log2len - (int)level < p->log2vecwidth) continue; for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] = estimate(p->log2len, level, N, config); } } } } } static int measure(SleefDFT *p, int randomize) { if (p->log2len == 1) { p->bestTime = 1ULL << 60; p->pathLen = 1; p->bestPath[1] = 1; return 1; } if (PlanManager_loadMeasurementResultsP(p, (p->mode & SLEEF_MODE_NO_MT) != 0 ? 1 : 0)) { if ((p->mode & SLEEF_MODE_VERBOSE) != 0) { printf("Path(loaded) : "); for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) printf("%d(%s) ", p->bestPath[j], configStr[p->bestPathConfig[j]]); printf("\n"); } return 1; } int toBeSaved = 0; for(uint32_t level = p->log2len;level >= 1;level--) { for(uint32_t N=1;N<=MAXBUTWIDTH;N++) { for(int config=0;configtm[config][level*(MAXBUTWIDTH+1)+N] = 1ULL << 60; } } } if (((p->mode & SLEEF_MODE_MEASURE) != 0 || (planFilePathSet && (p->mode & SLEEF_MODE_MEASUREBITS) == 0)) && !randomize) { measureBut(p); toBeSaved = 1; } else { estimateBut(p); } int executable = 0; for(int i=1;i<=MAXBUTWIDTH && !executable;i++) { if (p->tm[0][p->log2len*(MAXBUTWIDTH+1)+i] < (1ULL << 60)) executable = 1; } if (!executable) return 0; p->bestTime = 1ULL << 60; p->bestPath[p->log2len] = 0; if (!randomize) { searchForBestPath(p); } else { int path[MAXLOG2LEN+1]; int pathConfig[MAXLOG2LEN+1]; for(int j = p->log2len;j >= 0;j--) path[j] = pathConfig[j] = 0; int nTrial = 100000; do { nTrial = searchForRandomPathRecurse(p, p->log2len, path, pathConfig, 0, nTrial); } while(p->bestTime == 1ULL << 60 && nTrial >= 0); } if (p->bestPath[p->log2len] == 0) return 0; p->pathLen = 0; for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) p->pathLen++; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) { printf("Path"); if (randomize) printf("(random) :"); else if (toBeSaved) printf("(measured) :"); else printf("(estimated) :"); for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) printf("%d(%s) ", p->bestPath[j], configStr[p->bestPathConfig[j]]); printf("\n"); } if (toBeSaved) { PlanManager_saveMeasurementResultsP(p, (p->mode & SLEEF_MODE_NO_MT) != 0 ? 1 : 0); } return 1; } static void measureTranspose(SleefDFT *p) { if (PlanManager_loadMeasurementResultsT(p)) { if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose NoMT(loaded): %lld\n", (long long int)p->tmNoMT); if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose MT(loaded): %lld\n", (long long int)p->tmMT); return; } if ((p->mode & SLEEF_MODE_MEASURE) == 0 && (!planFilePathSet || (p->mode & SLEEF_MODE_MEASUREBITS) != 0)) { if (p->log2hlen + p->log2vlen >= 14) { p->tmNoMT = 20; p->tmMT = 10; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose : selected MT(estimated)\n"); } else { p->tmNoMT = 10; p->tmMT = 20; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose : selected NoMT(estimated)\n"); } return; } real *tBuf2 = (real *)Sleef_malloc(sizeof(real)*2*p->hlen*p->vlen); const int niter = 1 + 5000000 / (p->hlen * p->vlen + 1); uint64_t tm; tm = Sleef_currentTimeMicros(); for(int i=0;itBuf, p->log2hlen, p->log2vlen); transpose(tBuf2, p->tBuf, p->log2vlen, p->log2hlen); } p->tmNoMT = Sleef_currentTimeMicros() - tm + 1; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose NoMT(measured): %lld\n", (long long int)p->tmNoMT); #ifdef _OPENMP tm = Sleef_currentTimeMicros(); for(int i=0;itBuf, p->log2hlen, p->log2vlen); transposeMT(tBuf2, p->tBuf, p->log2vlen, p->log2hlen); } p->tmMT = Sleef_currentTimeMicros() - tm + 1; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("transpose MT(measured): %lld\n", (long long int)p->tmMT); #else p->tmMT = p->tmNoMT*2; #endif Sleef_free(tBuf2); PlanManager_saveMeasurementResultsT(p); } // Implementation of SleefDFT_*_init1d EXPORT SleefDFT *INIT(uint32_t n, const real *in, real *out, uint64_t mode) { SleefDFT *p = (SleefDFT *)calloc(1, sizeof(SleefDFT)); p->magic = MAGIC; p->baseTypeID = BASETYPEID; p->in = (const void *)in; p->out = (void *)out; // Mode p->mode = mode; if ((p->mode & SLEEF_MODE_NO_MT) == 0) { p->mode2 |= SLEEF_MODE2_MT1D; } if ((mode & SLEEF_MODE_REAL) != 0) n /= 2; p->log2len = ilog2(n); if (p->log2len <= 1) return p; if ((mode & SLEEF_MODE_ALT) != 0) p->mode = mode = mode ^ SLEEF_MODE_BACKWARD; #ifdef _OPENMP p->nThread = omp_thread_count(); #else p->nThread = 1; p->mode2 &= ~SLEEF_MODE2_MT1D; #endif // ISA availability int bestPriority = -1; p->isa = -1; for(int i=0;i= (*GETINT[i])(GETINT_VECWIDTH) * (*GETINT[i])(GETINT_VECWIDTH)) { bestPriority = (*GETINT[i])(GETINT_DFTPRIORITY); p->isa = i; } } if (p->isa == -1) { if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("ISA not available\n"); p->magic = 0; free(p); return NULL; } // Tables p->perm = (uint32_t **)calloc(sizeof(uint32_t *), p->log2len+1); for(int level = p->log2len;level >= 1;level--) { p->perm[level] = (uint32_t *)Sleef_malloc(sizeof(uint32_t) * ((1 << p->log2len) + 8)); } p->x0 = malloc(sizeof(real *) * p->nThread); p->x1 = malloc(sizeof(real *) * p->nThread); for(int i=0;inThread;i++) { p->x0[i] = (real *)Sleef_malloc(sizeof(real) * 2 * n); p->x1[i] = (real *)Sleef_malloc(sizeof(real) * 2 * n); } if ((mode & SLEEF_MODE_REAL) != 0) { p->rtCoef0 = (real *)Sleef_malloc(sizeof(real) * n); p->rtCoef1 = (real *)Sleef_malloc(sizeof(real) * n); if ((mode & SLEEF_MODE_BACKWARD) == 0) { for(uint32_t i=0;irtCoef0)[i*2+0] = ((real *)p->rtCoef0)[i*2+1] = (real)0.5 - (real)0.5 * sc.x; ((real *)p->rtCoef1)[i*2+0] = ((real *)p->rtCoef1)[i*2+1] = (real)0.5*sc.y; } } else { for(uint32_t i=0;irtCoef0)[i*2+0] = ((real *)p->rtCoef0)[i*2+1] = (real)0.5 + (real)0.5 * sc.x; ((real *)p->rtCoef1)[i*2+0] = ((real *)p->rtCoef1)[i*2+1] = (real)0.5*sc.y; } } } // Measure int sign = (mode & SLEEF_MODE_BACKWARD) != 0 ? -1 : 1; p->vecwidth = (*GETINT[p->isa])(GETINT_VECWIDTH); p->log2vecwidth = ilog2(p->vecwidth); for(int i=1;i<=MAXBUTWIDTH;i++) { ((real ***)p->tbl)[i] = makeTable(sign, p->vecwidth, p->log2len, i, constK[i]); } if (!measure(p, (mode & SLEEF_MODE_DEBUG))) { // Fall back to the first ISA freeTables(p); p->isa = 0; p->vecwidth = (*GETINT[p->isa])(GETINT_VECWIDTH); p->log2vecwidth = ilog2(p->vecwidth); for(int i=1;i<=MAXBUTWIDTH;i++) { ((real ***)p->tbl)[i] = makeTable(sign, p->vecwidth, p->log2len, i, constK[i]); } for(int level = p->log2len;level >= 1;) { int N = ABS(p->bestPath[level]); if (level == N) { level -= N; continue; } int i1 = 0; for(int i0=0;i0 < (1 << (p->log2len-N));i0+=p->vecwidth, i1++) { p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } for(;i1 < (1 << p->log2len) + 8;i1++) p->perm[level][i1] = 0; level -= N; } if (!measure(p, (mode & SLEEF_MODE_DEBUG))) { if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("Suitable ISA not found. This should not happen.\n"); return NULL; } } for(int level = p->log2len;level >= 1;) { int N = ABS(p->bestPath[level]); if (level == N) { level -= N; continue; } int i1 = 0; for(int i0=0;i0 < (1 << (p->log2len-N));i0+=p->vecwidth, i1++) { p->perm[level][i1] = 2*perm(p->log2len, i0, p->log2len-level, p->log2len-(level-N)); } for(;i1 < (1 << p->log2len) + 8;i1++) p->perm[level][i1] = 0; level -= N; } if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("ISA : %s %d bit %s\n", (char *)(*GETPTR[p->isa])(0), (int)(GETINT[p->isa](GETINT_VECWIDTH) * sizeof(real) * 16), BASETYPESTRING); return p; } // Implementation of SleefDFT_*_init2d EXPORT SleefDFT *INIT2D(uint32_t vlen, uint32_t hlen, const real *in, real *out, uint64_t mode) { SleefDFT *p = (SleefDFT *)calloc(1, sizeof(SleefDFT)); p->magic = MAGIC2D; p->mode = mode; p->baseTypeID = BASETYPEID; p->in = in; p->out = out; p->hlen = hlen; p->log2hlen = ilog2(hlen); p->vlen = vlen; p->log2vlen = ilog2(vlen); uint64_t mode1D = mode; mode1D |= SLEEF_MODE_NO_MT; if ((mode & SLEEF_MODE_NO_MT) == 0) p->mode3 |= SLEEF_MODE3_MT2D; p->instH = p->instV = INIT(hlen, NULL, NULL, mode1D); if (hlen != vlen) p->instV = INIT(vlen, NULL, NULL, mode1D); p->tBuf = (void *)Sleef_malloc(sizeof(real)*2*hlen*vlen); measureTranspose(p); return p; } // Implementation of SleefDFT_*_execute EXPORT void EXECUTE(SleefDFT *p, const real *s0, real *d0) { assert(p != NULL && (p->magic == MAGIC || p->magic == MAGIC2D)); const real *s = s0 == NULL ? p->in : s0; real *d = d0 == NULL ? p->out : d0; if (p->magic == MAGIC2D) { // S -> T -> D -> T -> D real *tBuf = (real *)(p->tBuf); #ifdef _OPENMP if ((p->mode3 & SLEEF_MODE3_MT2D) != 0 && (((p->mode & SLEEF_MODE_DEBUG) == 0 && p->tmMT < p->tmNoMT) || ((p->mode & SLEEF_MODE_DEBUG) != 0 && (rand() & 1)))) { int y; #pragma omp parallel for for(y=0;yvlen;y++) { EXECUTE(p->instH, &s[p->hlen*2*y], &tBuf[p->hlen*2*y]); } transposeMT(d, tBuf, p->log2vlen, p->log2hlen); #pragma omp parallel for for(y=0;yhlen;y++) { EXECUTE(p->instV, &d[p->vlen*2*y], &tBuf[p->vlen*2*y]); } transposeMT(d, tBuf, p->log2hlen, p->log2vlen); } else #endif { for(int y=0;yvlen;y++) { EXECUTE(p->instH, &s[p->hlen*2*y], &tBuf[p->hlen*2*y]); } transpose(d, tBuf, p->log2vlen, p->log2hlen); for(int y=0;yhlen;y++) { EXECUTE(p->instV, &d[p->vlen*2*y], &tBuf[p->vlen*2*y]); } transpose(d, tBuf, p->log2hlen, p->log2vlen); } return; } if (p->log2len <= 1) { if ((p->mode & SLEEF_MODE_REAL) == 0) { real r0 = s[0] + s[2]; real r1 = s[1] + s[3]; real r2 = s[0] - s[2]; real r3 = s[1] - s[3]; d[0] = r0; d[1] = r1; d[2] = r2; d[3] = r3; } else { if ((p->mode & SLEEF_MODE_ALT) == 0) { if (p->log2len == 1) { if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { real r0 = s[0] + s[2] + (s[1] + s[3]); real r1 = s[0] + s[2] - (s[1] + s[3]); real r2 = s[0] - s[2]; real r3 = s[3] - s[1]; d[0] = r0; d[1] = 0; d[2] = r2; d[3] = r3; d[4] = r1; d[5] = 0; } else { real r0 = (s[0] + s[4])*(real)0.5 + s[2]; real r1 = (s[0] - s[4])*(real)0.5 - s[3]; real r2 = (s[0] + s[4])*(real)0.5 - s[2]; real r3 = (s[0] - s[4])*(real)0.5 + s[3]; d[0] = r0*2; d[1] = r1*2; d[2] = r2*2; d[3] = r3*2; } } else { if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { real r0 = s[0] + s[1]; real r1 = s[0] - s[1]; d[0] = r0; d[1] = 0; d[2] = r1; d[3] = 0; } else { real r0 = s[0] + s[2]; real r1 = s[0] - s[2]; d[0] = r0; d[1] = r1; } } } else { if (p->log2len == 1) { if ((p->mode & SLEEF_MODE_BACKWARD) == 0) { real r0 = s[0] + s[2] + (s[1] + s[3]); real r1 = s[0] + s[2] - (s[1] + s[3]); real r2 = s[0] - s[2]; real r3 = s[1] - s[3]; d[0] = r0; d[1] = r1; d[2] = r2; d[3] = r3; } else { real r0 = (s[0] + s[1])*(real)0.5 + s[2]; real r1 = (s[0] - s[1])*(real)0.5 + s[3]; real r2 = (s[0] + s[1])*(real)0.5 - s[2]; real r3 = (s[0] - s[1])*(real)0.5 - s[3]; d[0] = r0; d[1] = r1; d[2] = r2; d[3] = r3; } } else { real c = ((p->mode & SLEEF_MODE_BACKWARD) != 0) ? (real)0.5 : (real)1.0; real r0 = s[0] + s[1]; real r1 = s[0] - s[1]; d[0] = r0 * c; d[1] = r1 * c; } } } return; } // #ifdef _OPENMP const int tn = omp_get_thread_num(); real *t[] = { p->x1[tn], p->x0[tn], d }; #else real *t[] = { p->x1[0], p->x0[0], d }; #endif const real *lb = s; int nb = 0; if ((p->mode & SLEEF_MODE_REAL) != 0 && (p->pathLen & 1) == 0 && ((p->mode & SLEEF_MODE_BACKWARD) != 0) != ((p->mode & SLEEF_MODE_ALT) != 0)) nb = -1; if ((p->mode & SLEEF_MODE_REAL) == 0 && (p->pathLen & 1) == 1) nb = -1; if ((p->mode & SLEEF_MODE_REAL) != 0 && ((p->mode & SLEEF_MODE_BACKWARD) != 0) != ((p->mode & SLEEF_MODE_ALT) != 0)) { (*REALSUB1[p->isa])(t[nb+1], s, p->log2len, p->rtCoef0, p->rtCoef1, (p->mode & SLEEF_MODE_ALT) == 0); if ((p-> mode & SLEEF_MODE_ALT) == 0) t[nb+1][(1 << p->log2len)+1] = -s[(1 << p->log2len)+1] * 2; lb = t[nb+1]; nb = (nb + 1) & 1; } for(int level = p->log2len;level >= 1;) { int N = ABS(p->bestPath[level]), config = p->bestPathConfig[level]; dispatch(p, N, t[nb+1], lb, level, config); level -= N; lb = t[nb+1]; nb = (nb + 1) & 1; } if ((p->mode & SLEEF_MODE_REAL) != 0 && ((p->mode & SLEEF_MODE_BACKWARD) == 0) != ((p->mode & SLEEF_MODE_ALT) != 0)) { (*REALSUB0[p->isa])(d, lb, p->log2len, p->rtCoef0, p->rtCoef1); if ((p->mode & SLEEF_MODE_ALT) == 0) { d[(1 << p->log2len)+1] = -d[(1 << p->log2len)+1]; d[(2 << p->log2len)+0] = d[1]; d[(2 << p->log2len)+1] = 0; d[1] = 0; } } } sleef-3.3.1/src/dft/dftcommon.c000066400000000000000000000274751333715643700163450ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #ifdef _OPENMP #include #endif #include "misc.h" #include "sleef.h" #define IMPORT_IS_EXPORT #include "sleefdft.h" #include "dispatchparam.h" #include "dftcommon.h" #include "common.h" #include "arraymap.h" #define MAGIC_FLOAT 0x31415926 #define MAGIC_DOUBLE 0x27182818 #define MAGIC_LONGDOUBLE 0x14142135 #define MAGIC_QUAD 0x33166247 #define MAGIC2D_FLOAT 0x22360679 #define MAGIC2D_DOUBLE 0x17320508 #define MAGIC2D_LONGDOUBLE 0x26457513 #define MAGIC2D_QUAD 0x36055512 const char *configStr[] = { "ST", "ST stream", "MT", "MT stream" }; static int parsePathStr(char *p, int *path, int *config, int pathLenMax, int log2len) { int pathLen = 0, l2l = 0; for(;;) { while(*p == ' ') p++; if (*p == '\0') break; if (!isdigit(*p)) return -1; pathLen++; if (pathLen >= pathLenMax) return -2; int n = 0; while(isdigit(*p)) n = n * 10 + *p++ - '0'; if (n > MAXBUTWIDTH) return -6; path[pathLen-1] = n; l2l += n; config[pathLen-1] = 0; if (*p != '(') continue; int c; for(c=3;c>=0;c--) if (strncmp(p+1, configStr[c], strlen(configStr[c])) == 0) break; if (c == -1) return -3; p += strlen(configStr[c]) + 1; if (*p != ')') return -4; p++; config[pathLen-1] = c; } if (l2l != log2len) return -5; return pathLen; } EXPORT void SleefDFT_setPath(SleefDFT *p, char *pathStr) { assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); int path[32], config[32]; int pathLen = parsePathStr(pathStr, path, config, 31, p->log2len); if (pathLen < 0) { if ((p->mode & SLEEF_MODE_VERBOSE) != 0) printf("Error %d in parsing path string : %s\n", pathLen, pathStr); return; } for(uint32_t j = 0;j <= p->log2len;j++) p->bestPath[j] = 0; for(int level = p->log2len, j=0;level > 0 && j < pathLen;) { p->bestPath[level] = path[j]; p->bestPathConfig[level] = config[j]; level -= path[j]; j++; } p->pathLen = 0; for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) p->pathLen++; if ((p->mode & SLEEF_MODE_VERBOSE) != 0) { printf("Set path : "); for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) printf("%d(%s) ", p->bestPath[j], configStr[p->bestPathConfig[j]]); printf("\n"); } } void freeTables(SleefDFT *p) { for(int N=1;N<=MAXBUTWIDTH;N++) { for(uint32_t level=N;level<=p->log2len;level++) { Sleef_free(p->tbl[N][level]); } free(p->tbl[N]); p->tbl[N] = NULL; } } EXPORT void SleefDFT_dispose(SleefDFT *p) { if (p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD)) { Sleef_free(p->tBuf); SleefDFT_dispose(p->instH); if (p->hlen != p->vlen) SleefDFT_dispose(p->instV); p->magic = 0; free(p); return; } assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); if (p->log2len <= 1) { p->magic = 0; free(p); return; } if ((p->mode & SLEEF_MODE_REAL) != 0) { Sleef_free(p->rtCoef1); Sleef_free(p->rtCoef0); p->rtCoef0 = p->rtCoef1 = NULL; } for(int level = p->log2len;level >= 1;level--) { Sleef_free(p->perm[level]); } free(p->perm); p->perm = NULL; freeTables(p); p->magic = 0; free(p); } uint32_t ilog2(uint32_t q) { static const uint32_t tab[] = {0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4}; uint32_t r = 0,qq; if (q & 0xffff0000) r = 16; q >>= r; qq = q | (q >> 1); qq |= (qq >> 2); qq = ((qq & 0x10) >> 4) | ((qq & 0x100) >> 7) | ((qq & 0x1000) >> 10); return r + tab[qq] * 4 + tab[q >> (tab[qq] * 4)] - 1; } // char *dftPlanFilePath = NULL; char *archID = NULL; uint64_t planMode = SLEEF_PLAN_REFERTOENVVAR; ArrayMap *planMap = NULL; int planFilePathSet = 0, planFileLoaded = 0; #ifdef _OPENMP omp_lock_t planMapLock; int planMapLockInitialized = 0; #endif static void initPlanMapLock() { #ifdef _OPENMP #pragma omp critical { if (!planMapLockInitialized) { planMapLockInitialized = 1; omp_init_lock(&planMapLock); } } #endif } static void planMap_clear() { if (planMap != NULL) ArrayMap_dispose(planMap); planMap = NULL; } EXPORT void SleefDFT_setPlanFilePath(const char *path, const char *arch, uint64_t mode) { initPlanMapLock(); if ((mode & SLEEF_PLAN_RESET) != 0) { planMap_clear(); planFileLoaded = 0; planFilePathSet = 0; } if (dftPlanFilePath != NULL) free(dftPlanFilePath); if (path != NULL) { dftPlanFilePath = malloc(strlen(path)+10); strcpy(dftPlanFilePath, path); } else { dftPlanFilePath = NULL; } if (archID != NULL) free(archID); if (arch == NULL) arch = Sleef_getCpuIdString(); archID = malloc(strlen(arch)+10); strcpy(archID, arch); planMode = mode; planFilePathSet = 1; } static void loadPlanFromFile() { if (planFilePathSet == 0 && (planMode & SLEEF_PLAN_REFERTOENVVAR) != 0) { char *s = getenv(ENVVAR); if (s != NULL) SleefDFT_setPlanFilePath(s, NULL, planMode); } if (planMap != NULL) ArrayMap_dispose(planMap); if (dftPlanFilePath != NULL && (planMode & SLEEF_PLAN_RESET) == 0) { planMap = ArrayMap_load(dftPlanFilePath, archID, PLANFILEID, (planMode & SLEEF_PLAN_NOLOCK) == 0); } if (planMap == NULL) planMap = initArrayMap(); planFileLoaded = 1; } static void savePlanToFile() { assert(planFileLoaded); if ((planMode & SLEEF_PLAN_READONLY) == 0 && dftPlanFilePath != NULL) { ArrayMap_save(planMap, dftPlanFilePath, archID, PLANFILEID); } } #define CATBIT 8 #define BASETYPEIDBIT 2 #define LOG2LENBIT 8 #define DIRBIT 1 #define BUTSTATBIT 16 static uint64_t keyButStat(int baseTypeID, int log2len, int dir, int butStat) { dir = (dir & SLEEF_MODE_BACKWARD) == 0; int cat = 0; uint64_t k = 0; k = (k << BUTSTATBIT) | (butStat & ~(~(uint64_t)0 << BUTSTATBIT)); k = (k << LOG2LENBIT) | (log2len & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << DIRBIT) | (dir & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT)); k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT)); return k; } #define LEVELBIT LOG2LENBIT #define BUTCONFIGBIT 8 #define TRANSCONFIGBIT 8 static uint64_t keyTrans(int baseTypeID, int hlen, int vlen, int transConfig) { int max = MAX(hlen, vlen), min = MIN(hlen, vlen); int cat = 2; uint64_t k = 0; k = (k << TRANSCONFIGBIT) | (transConfig & ~(~(uint64_t)0 << TRANSCONFIGBIT)); k = (k << LOG2LENBIT) | (max & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << LOG2LENBIT) | (min & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT)); k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT)); return k; } static uint64_t keyPath(int baseTypeID, int log2len, int dir, int level, int config) { dir = (dir & SLEEF_MODE_BACKWARD) == 0; int cat = 3; uint64_t k = 0; k = (k << BUTCONFIGBIT) | (config & ~(~(uint64_t)0 << BUTCONFIGBIT)); k = (k << LEVELBIT) | (level & ~(~(uint64_t)0 << LEVELBIT)); k = (k << LOG2LENBIT) | (log2len & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << DIRBIT) | (dir & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT)); k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT)); return k; } static uint64_t keyPathConfig(int baseTypeID, int log2len, int dir, int level, int config) { dir = (dir & SLEEF_MODE_BACKWARD) == 0; int cat = 4; uint64_t k = 0; k = (k << BUTCONFIGBIT) | (config & ~(~(uint64_t)0 << BUTCONFIGBIT)); k = (k << LEVELBIT) | (level & ~(~(uint64_t)0 << LEVELBIT)); k = (k << LOG2LENBIT) | (log2len & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << DIRBIT) | (dir & ~(~(uint64_t)0 << LOG2LENBIT)); k = (k << BASETYPEIDBIT) | (baseTypeID & ~(~(uint64_t)0 << BASETYPEIDBIT)); k = (k << CATBIT) | (cat & ~(~(uint64_t)0 << CATBIT)); return k; } static uint64_t planMap_getU64(uint64_t key) { char *s = ArrayMap_get(planMap, key); if (s == NULL) return 0; uint64_t ret; if (sscanf(s, "%" SCNx64, &ret) != 1) return 0; return ret; } static void planMap_putU64(uint64_t key, uint64_t value) { char *s = malloc(100); sprintf(s, "%" PRIx64, value); s = ArrayMap_put(planMap, key, s); if (s != NULL) free(s); } int PlanManager_loadMeasurementResultsP(SleefDFT *p, int pathCat) { assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); initPlanMapLock(); #ifdef _OPENMP omp_set_lock(&planMapLock); #endif if (!planFileLoaded) loadPlanFromFile(); int stat = planMap_getU64(keyButStat(p->baseTypeID, p->log2len, p->mode, pathCat+10)); if (stat == 0) { #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif return 0; } int ret = 1; for(int j = p->log2len;j >= 0;j--) { p->bestPath[j] = planMap_getU64(keyPath(p->baseTypeID, p->log2len, p->mode, j, pathCat)); p->bestPathConfig[j] = planMap_getU64(keyPathConfig(p->baseTypeID, p->log2len, p->mode, j, pathCat)); if (p->bestPath[j] > MAXBUTWIDTH) ret = 0; } p->pathLen = 0; for(int j = p->log2len;j >= 0;j--) if (p->bestPath[j] != 0) p->pathLen++; #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif return ret; } void PlanManager_saveMeasurementResultsP(SleefDFT *p, int pathCat) { assert(p != NULL && (p->magic == MAGIC_FLOAT || p->magic == MAGIC_DOUBLE || p->magic == MAGIC_LONGDOUBLE || p->magic == MAGIC_QUAD)); initPlanMapLock(); #ifdef _OPENMP omp_set_lock(&planMapLock); #endif if (!planFileLoaded) loadPlanFromFile(); if (planMap_getU64(keyButStat(p->baseTypeID, p->log2len, p->mode, pathCat+10)) != 0) { #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif return; } for(int j = p->log2len;j >= 0;j--) { planMap_putU64(keyPath(p->baseTypeID, p->log2len, p->mode, j, pathCat), p->bestPath[j]); planMap_putU64(keyPathConfig(p->baseTypeID, p->log2len, p->mode, j, pathCat), p->bestPathConfig[j]); } planMap_putU64(keyButStat(p->baseTypeID, p->log2len, p->mode, pathCat+10), 1); if ((planMode & SLEEF_PLAN_READONLY) == 0) savePlanToFile(); #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif } int PlanManager_loadMeasurementResultsT(SleefDFT *p) { assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD)); initPlanMapLock(); int ret = 0; #ifdef _OPENMP omp_set_lock(&planMapLock); #endif if (!planFileLoaded) loadPlanFromFile(); p->tmNoMT = planMap_getU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 0)); p->tmMT = planMap_getU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 1)); #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif return p->tmNoMT != 0; } void PlanManager_saveMeasurementResultsT(SleefDFT *p) { assert(p != NULL && (p->magic == MAGIC2D_FLOAT || p->magic == MAGIC2D_DOUBLE || p->magic == MAGIC2D_LONGDOUBLE || p->magic == MAGIC2D_QUAD)); initPlanMapLock(); int ret = 0; #ifdef _OPENMP omp_set_lock(&planMapLock); #endif if (!planFileLoaded) loadPlanFromFile(); planMap_putU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 0), p->tmNoMT); planMap_putU64(keyTrans(p->baseTypeID, p->log2hlen, p->log2vlen, 1), p->tmMT ); if ((planMode & SLEEF_PLAN_READONLY) == 0) savePlanToFile(); #ifdef _OPENMP omp_unset_lock(&planMapLock); #endif } sleef-3.3.1/src/dft/dftcommon.h000066400000000000000000000032201333715643700163300ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #define CONFIGMAX 4 #define CONFIG_STREAM 1 #define CONFIG_MT 2 #define MAXLOG2LEN 32 typedef struct SleefDFT { uint32_t magic; uint64_t mode, mode2, mode3; int baseTypeID; const void *in; void *out; union { struct { uint32_t log2len; void **tbl[MAXBUTWIDTH+1]; void *rtCoef0, *rtCoef1; uint32_t **perm; void **x0, **x1; int isa; int planMode; int vecwidth, log2vecwidth; int nThread; uint64_t tm[CONFIGMAX][(MAXBUTWIDTH+1)*32]; uint64_t bestTime; int16_t bestPath[32], bestPathConfig[32], pathLen; }; struct { int32_t hlen, vlen; int32_t log2hlen, log2vlen; uint64_t tmNoMT, tmMT; struct SleefDFT *instH, *instV; void *tBuf; }; }; } SleefDFT; #define SLEEF_MODE2_MT1D (1 << 0) #define SLEEF_MODE3_MT2D (1 << 0) #define PLANFILEID "SLEEFDFT0\n" #define ENVVAR "SLEEFDFTPLAN" #define SLEEF_MODE_MEASUREBITS (3 << 20) void freeTables(SleefDFT *p); uint32_t ilog2(uint32_t q); //int PlanManager_loadMeasurementResultsB(SleefDFT *p); //void PlanManager_saveMeasurementResultsB(SleefDFT *p, int butStat); int PlanManager_loadMeasurementResultsT(SleefDFT *p); void PlanManager_saveMeasurementResultsT(SleefDFT *p); int PlanManager_loadMeasurementResultsP(SleefDFT *p, int pathCat); void PlanManager_saveMeasurementResultsP(SleefDFT *p, int pathCat); #define GETINT_VECWIDTH 100 #define GETINT_DFTPRIORITY 101 sleef-3.3.1/src/dft/mkdispatch.c000066400000000000000000000142311333715643700164700ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #ifndef ENABLE_STREAM #error ENABLE_STREAM not defined #endif int main(int argc, char **argv) { if (argc < 3) { fprintf(stderr, "Usage : %s ...\n", argv[0]); exit(-1); } const char *basetype = argv[1]; const int maxbutwidth = atoi(argv[2]); const int isastart = 3; const int isamax = argc - isastart; #if ENABLE_STREAM == 1 const int enable_stream = 1; #else const int enable_stream = 0; #endif printf("#define MAXBUTWIDTH %d\n", maxbutwidth); printf("\n"); if (strcmp(basetype, "paramonly") == 0) exit(0); printf("#define ISAMAX %d\n", isamax); printf("#define CONFIGMAX 4\n"); for(int k=isastart;k #include #include #include #define CONFIGMAX 4 char *replaceAll(const char *in, const char *pat, const char *replace) { const int replaceLen = strlen(replace); const int patLen = strlen(pat); char *str = malloc(strlen(in)+1); strcpy(str, in); for(;;) { char *p = strstr(str, pat); if (p == NULL) return str; int replace_pos = p - str; int tail_len = strlen(p + patLen); char *newstr = malloc(strlen(str) + (replaceLen - patLen) + 1); memcpy(newstr, str, replace_pos); memcpy(newstr + replace_pos, replace, replaceLen); memcpy(newstr + replace_pos + replaceLen, str + replace_pos + patLen, tail_len+1); free(str); str = newstr; } return str; } #define LEN 1024 char line[LEN+10]; int main(int argc, char **argv) { if (argc < 2) { fprintf(stderr, "Usage : %s ...\n", argv[0]); exit(-1); } const char *baseType = argv[1]; const int isastart = 2; const int isamax = argc - isastart; const int maxbutwidth = 6; for(int config=0;config> outShift); store(out, (0 << outShift), plus(load(in, (0 << inShift)), load(in, (1 << inShift)))); real2 v4 = minus(load(in, (0 << inShift)), load(in, (1 << inShift))); store(out, (1 << outShift), ctimesminusplus(v4, tbl[0 + tbloffset], ctimes(reverse(v4), tbl[1 + tbloffset]))); } } ALIGNED(8192) void but2b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); store(out, (0 << outShift), plus(load(in, (0 << inShift)), load(in, (1 << inShift)))); real2 v4 = minus(load(in, (0 << inShift)), load(in, (1 << inShift))); store(out, (1 << outShift), ctimesminusplus(v4, tbl[0 + tbloffset], ctimes(reverse(v4), tbl[1 + tbloffset]))); } } ALIGNED(8192) void tbut2f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; scatter(out, 0, 2, plus(load(in, (0 << inShift)), load(in, (1 << inShift)))); real2 v4 = minus(load(in, (0 << inShift)), load(in, (1 << inShift))); scatter(out, 1, 2, timesminusplus(v4, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v4), load(tbl, 1 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void tbut2b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; scatter(out, 0, 2, plus(load(in, (0 << inShift)), load(in, (1 << inShift)))); real2 v4 = minus(load(in, (0 << inShift)), load(in, (1 << inShift))); scatter(out, 1, 2, timesminusplus(v4, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v4), load(tbl, 1 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void dft4f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v3 = load(in, 1 << shift); real2 v5 = load(in, 3 << shift); real2 v7 = reverse(minus(v3, v5)); real2 v13 = plus(v3, v5); real2 v4 = load(in, 2 << shift); real2 v2 = load(in, 0 << shift); real2 v8 = minus(v4, v2); real2 v12 = plus(v2, v4); store(out, 3 << shift, minus(uminusplus(v7), v8)); store(out, 1 << shift, minus(uplusminus(v7), v8)); store(out, 2 << shift, minus(v12, v13)); store(out, 0 << shift, plus(v12, v13)); } } ALIGNED(8192) void dft4b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v3 = load(in, 1 << shift); real2 v5 = load(in, 3 << shift); real2 v13 = plus(v3, v5); real2 v7 = reverse(minus(v5, v3)); real2 v4 = load(in, 2 << shift); real2 v2 = load(in, 0 << shift); real2 v8 = minus(v4, v2); store(out, 3 << shift, minus(uminusplus(v7), v8)); store(out, 1 << shift, minus(uplusminus(v7), v8)); real2 v12 = plus(v2, v4); store(out, 2 << shift, minus(v12, v13)); store(out, 0 << shift, plus(v12, v13)); } } ALIGNED(8192) void but4f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v5 = load(in, 3 << inShift); real2 v3 = load(in, 1 << inShift); real2 v7 = reverse(minus(v3, v5)); real2 v13 = plus(v3, v5); real2 v2 = load(in, 0 << inShift); real2 v4 = load(in, 2 << inShift); real2 v8 = minus(v4, v2); real2 v12 = plus(v2, v4); store(out, 0 << outShift, plus(v12, v13)); real2 v26 = minus(v12, v13); store(out, 2 << outShift, ctimesminusplus(v26, tbl[0 + tbloffset], ctimes(reverse(v26), tbl[1 + tbloffset]))); real2 v11 = minusplus(uminus(v7), v8); real2 v9 = minusplus(v7, v8); store(out, 1 << outShift, ctimesminusplus(reverse(v9), tbl[2 + tbloffset], ctimes(v9, tbl[3 + tbloffset]))); store(out, 3 << outShift, ctimesminusplus(reverse(v11), tbl[4 + tbloffset], ctimes(v11, tbl[5 + tbloffset]))); } } ALIGNED(8192) void but4b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v5 = load(in, 3 << inShift); real2 v3 = load(in, 1 << inShift); real2 v7 = reverse(minus(v5, v3)); real2 v13 = plus(v3, v5); real2 v2 = load(in, 0 << inShift); real2 v4 = load(in, 2 << inShift); real2 v8 = minus(v4, v2); real2 v12 = plus(v2, v4); store(out, 0 << outShift, plus(v12, v13)); real2 v26 = minus(v12, v13); store(out, 2 << outShift, ctimesminusplus(v26, tbl[0 + tbloffset], ctimes(reverse(v26), tbl[1 + tbloffset]))); real2 v11 = minusplus(uminus(v7), v8); real2 v9 = minusplus(v7, v8); store(out, 1 << outShift, ctimesminusplus(reverse(v9), tbl[2 + tbloffset], ctimes(v9, tbl[3 + tbloffset]))); store(out, 3 << outShift, ctimesminusplus(reverse(v11), tbl[4 + tbloffset], ctimes(v11, tbl[5 + tbloffset]))); } } ALIGNED(8192) void tbut4f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v5 = load(in, 3 << inShift); real2 v3 = load(in, 1 << inShift); real2 v7 = reverse(minus(v3, v5)); real2 v13 = plus(v3, v5); real2 v2 = load(in, 0 << inShift); real2 v4 = load(in, 2 << inShift); real2 v8 = minus(v4, v2); real2 v12 = plus(v2, v4); scatter(out, 0, 4, plus(v12, v13)); real2 v26 = minus(v12, v13); scatter(out, 2, 4, timesminusplus(v26, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v26), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v11 = minusplus(uminus(v7), v8); real2 v9 = minusplus(v7, v8); scatter(out, 1, 4, timesminusplus(reverse(v9), load(tbl, 2 * VECWIDTH + tbloffset), times(v9, load(tbl, 3 * VECWIDTH + tbloffset)))); scatter(out, 3, 4, timesminusplus(reverse(v11), load(tbl, 4 * VECWIDTH + tbloffset), times(v11, load(tbl, 5 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void tbut4b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v5 = load(in, 3 << inShift); real2 v3 = load(in, 1 << inShift); real2 v7 = reverse(minus(v5, v3)); real2 v13 = plus(v3, v5); real2 v2 = load(in, 0 << inShift); real2 v4 = load(in, 2 << inShift); real2 v8 = minus(v4, v2); real2 v12 = plus(v2, v4); scatter(out, 0, 4, plus(v12, v13)); real2 v26 = minus(v12, v13); scatter(out, 2, 4, timesminusplus(v26, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v26), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v11 = minusplus(uminus(v7), v8); real2 v9 = minusplus(v7, v8); scatter(out, 1, 4, timesminusplus(reverse(v9), load(tbl, 2 * VECWIDTH + tbloffset), times(v9, load(tbl, 3 * VECWIDTH + tbloffset)))); scatter(out, 3, 4, timesminusplus(reverse(v11), load(tbl, 4 * VECWIDTH + tbloffset), times(v11, load(tbl, 5 * VECWIDTH + tbloffset)))); } } #if MAXBUTWIDTH >= 3 ALIGNED(8192) void dft8f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v9 = load(in, 7 << shift); real2 v5 = load(in, 3 << shift); real2 v33 = plus(v5, v9); real2 v27 = reverse(minus(v5, v9)); real2 v3 = load(in, 1 << shift); real2 v7 = load(in, 5 << shift); real2 v32 = plus(v3, v7); real2 v28 = minus(v7, v3); real2 v45 = reverse(minus(v32, v33)); real2 v51 = plus(v32, v33); real2 v29 = minusplus(v27, v28); real2 v31 = minusplus(uminus(v27), v28); real2 v43 = ctimesminusplus(reverse(v31), ctbl[1], ctimes(v31, ctbl[0])); real2 v6 = load(in, 4 << shift); real2 v2 = load(in, 0 << shift); real2 v12 = minus(v6, v2); real2 v16 = plus(v2, v6); real2 v8 = load(in, 6 << shift); real2 v4 = load(in, 2 << shift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v4, v8)); real2 v46 = minus(v17, v16); store(out, 2 << shift, minus(uplusminus(v45), v46)); store(out, 6 << shift, minus(uminusplus(v45), v46)); real2 v50 = plus(v16, v17); store(out, 4 << shift, minus(v50, v51)); store(out, 0 << shift, plus(v50, v51)); real2 v25 = minus(uminusplus(v11), v12); store(out, 3 << shift, plus(v25, v43)); store(out, 7 << shift, minus(v25, v43)); real2 v21 = minus(uplusminus(v11), v12); real2 v38 = ctimesminusplus(reverse(v29), ctbl[1], ctimes(v29, ctbl[1])); store(out, 1 << shift, plus(v21, v38)); store(out, 5 << shift, minus(v21, v38)); } } ALIGNED(8192) void dft8b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v8 = load(in, 6 << shift); real2 v4 = load(in, 2 << shift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v8, v4)); real2 v2 = load(in, 0 << shift); real2 v6 = load(in, 4 << shift); real2 v16 = plus(v2, v6); real2 v12 = minus(v6, v2); real2 v50 = plus(v16, v17); real2 v46 = minus(v17, v16); real2 v21 = minus(uplusminus(v11), v12); real2 v25 = minus(uminusplus(v11), v12); real2 v3 = load(in, 1 << shift); real2 v7 = load(in, 5 << shift); real2 v28 = minus(v7, v3); real2 v32 = plus(v3, v7); real2 v5 = load(in, 3 << shift); real2 v9 = load(in, 7 << shift); real2 v33 = plus(v5, v9); real2 v27 = reverse(minus(v9, v5)); real2 v45 = reverse(minus(v33, v32)); real2 v51 = plus(v32, v33); store(out, 0 << shift, plus(v50, v51)); store(out, 4 << shift, minus(v50, v51)); store(out, 2 << shift, minus(uplusminus(v45), v46)); store(out, 6 << shift, minus(uminusplus(v45), v46)); real2 v31 = minusplus(uminus(v27), v28); real2 v29 = minusplus(v27, v28); real2 v43 = ctimesminusplus(reverse(v31), ctbl[0], ctimes(v31, ctbl[0])); store(out, 7 << shift, minus(v25, v43)); store(out, 3 << shift, plus(v25, v43)); real2 v39 = ctimesminusplus(reverse(v29), ctbl[0], ctimes(v29, ctbl[1])); store(out, 1 << shift, plus(v21, v39)); store(out, 5 << shift, minus(v21, v39)); } } ALIGNED(8192) void but8f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v9 = load(in, 7 << inShift); real2 v5 = load(in, 3 << inShift); real2 v37 = plus(v5, v9); real2 v31 = reverse(minus(v5, v9)); real2 v7 = load(in, 5 << inShift); real2 v3 = load(in, 1 << inShift); real2 v36 = plus(v3, v7); real2 v32 = minus(v7, v3); real2 v57 = plus(v36, v37); real2 v51 = reverse(minus(v36, v37)); real2 v35 = minusplus(uminus(v31), v32); real2 v33 = minusplus(v31, v32); real2 v43 = ctimesminusplus(reverse(v33), tbl[6 + tbloffset], ctimes(v33, tbl[7 + tbloffset])); real2 v6 = load(in, 4 << inShift); real2 v2 = load(in, 0 << inShift); real2 v16 = plus(v2, v6); real2 v12 = minus(v6, v2); real2 v8 = load(in, 6 << inShift); real2 v4 = load(in, 2 << inShift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v4, v8)); real2 v52 = minus(v17, v16); real2 v56 = plus(v16, v17); store(out, 0 << outShift, plus(v56, v57)); real2 v70 = minus(v56, v57); store(out, 4 << outShift, ctimesminusplus(v70, tbl[0 + tbloffset], ctimes(reverse(v70), tbl[1 + tbloffset]))); real2 v53 = minusplus(v51, v52); store(out, 2 << outShift, ctimesminusplus(reverse(v53), tbl[10 + tbloffset], ctimes(v53, tbl[11 + tbloffset]))); real2 v55 = minusplus(uminus(v51), v52); store(out, 6 << outShift, ctimesminusplus(reverse(v55), tbl[12 + tbloffset], ctimes(v55, tbl[13 + tbloffset]))); real2 v15 = minusplus(uminus(v11), v12); real2 v13 = minusplus(v11, v12); real2 v23 = ctimesminusplus(reverse(v13), tbl[2 + tbloffset], ctimes(v13, tbl[3 + tbloffset])); store(out, 1 << outShift, plus(v23, v43)); real2 v78 = minus(v23, v43); store(out, 5 << outShift, ctimesminusplus(v78, tbl[0 + tbloffset], ctimes(reverse(v78), tbl[1 + tbloffset]))); real2 v49 = ctimesminusplus(reverse(v35), tbl[8 + tbloffset], ctimes(v35, tbl[9 + tbloffset])); real2 v29 = ctimesminusplus(reverse(v15), tbl[4 + tbloffset], ctimes(v15, tbl[5 + tbloffset])); store(out, 3 << outShift, plus(v29, v49)); real2 v84 = minus(v29, v49); store(out, 7 << outShift, ctimesminusplus(v84, tbl[0 + tbloffset], ctimes(reverse(v84), tbl[1 + tbloffset]))); } } ALIGNED(8192) void but8b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v9 = load(in, 7 << inShift); real2 v5 = load(in, 3 << inShift); real2 v37 = plus(v5, v9); real2 v31 = reverse(minus(v9, v5)); real2 v7 = load(in, 5 << inShift); real2 v3 = load(in, 1 << inShift); real2 v36 = plus(v3, v7); real2 v32 = minus(v7, v3); real2 v57 = plus(v36, v37); real2 v51 = reverse(minus(v37, v36)); real2 v35 = minusplus(uminus(v31), v32); real2 v33 = minusplus(v31, v32); real2 v43 = ctimesminusplus(reverse(v33), tbl[6 + tbloffset], ctimes(v33, tbl[7 + tbloffset])); real2 v6 = load(in, 4 << inShift); real2 v2 = load(in, 0 << inShift); real2 v16 = plus(v2, v6); real2 v12 = minus(v6, v2); real2 v8 = load(in, 6 << inShift); real2 v4 = load(in, 2 << inShift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v8, v4)); real2 v52 = minus(v17, v16); real2 v56 = plus(v16, v17); store(out, 0 << outShift, plus(v56, v57)); real2 v70 = minus(v56, v57); store(out, 4 << outShift, ctimesminusplus(v70, tbl[0 + tbloffset], ctimes(reverse(v70), tbl[1 + tbloffset]))); real2 v53 = minusplus(v51, v52); store(out, 2 << outShift, ctimesminusplus(reverse(v53), tbl[10 + tbloffset], ctimes(v53, tbl[11 + tbloffset]))); real2 v55 = minusplus(uminus(v51), v52); store(out, 6 << outShift, ctimesminusplus(reverse(v55), tbl[12 + tbloffset], ctimes(v55, tbl[13 + tbloffset]))); real2 v15 = minusplus(uminus(v11), v12); real2 v13 = minusplus(v11, v12); real2 v23 = ctimesminusplus(reverse(v13), tbl[2 + tbloffset], ctimes(v13, tbl[3 + tbloffset])); store(out, 1 << outShift, plus(v23, v43)); real2 v78 = minus(v23, v43); store(out, 5 << outShift, ctimesminusplus(v78, tbl[0 + tbloffset], ctimes(reverse(v78), tbl[1 + tbloffset]))); real2 v49 = ctimesminusplus(reverse(v35), tbl[8 + tbloffset], ctimes(v35, tbl[9 + tbloffset])); real2 v29 = ctimesminusplus(reverse(v15), tbl[4 + tbloffset], ctimes(v15, tbl[5 + tbloffset])); store(out, 3 << outShift, plus(v29, v49)); real2 v84 = minus(v29, v49); store(out, 7 << outShift, ctimesminusplus(v84, tbl[0 + tbloffset], ctimes(reverse(v84), tbl[1 + tbloffset]))); } } ALIGNED(8192) void tbut8f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v9 = load(in, 7 << inShift); real2 v5 = load(in, 3 << inShift); real2 v37 = plus(v5, v9); real2 v31 = reverse(minus(v5, v9)); real2 v7 = load(in, 5 << inShift); real2 v3 = load(in, 1 << inShift); real2 v36 = plus(v3, v7); real2 v32 = minus(v7, v3); real2 v57 = plus(v36, v37); real2 v51 = reverse(minus(v36, v37)); real2 v35 = minusplus(uminus(v31), v32); real2 v33 = minusplus(v31, v32); real2 v43 = timesminusplus(reverse(v33), load(tbl, 6 * VECWIDTH + tbloffset), times(v33, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v6 = load(in, 4 << inShift); real2 v2 = load(in, 0 << inShift); real2 v16 = plus(v2, v6); real2 v12 = minus(v6, v2); real2 v8 = load(in, 6 << inShift); real2 v4 = load(in, 2 << inShift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v4, v8)); real2 v52 = minus(v17, v16); real2 v56 = plus(v16, v17); scatter(out, 0, 8, plus(v56, v57)); real2 v70 = minus(v56, v57); scatter(out, 4, 8, timesminusplus(v70, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v70), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v53 = minusplus(v51, v52); scatter(out, 2, 8, timesminusplus(reverse(v53), load(tbl, 10 * VECWIDTH + tbloffset), times(v53, load(tbl, 11 * VECWIDTH + tbloffset)))); real2 v55 = minusplus(uminus(v51), v52); scatter(out, 6, 8, timesminusplus(reverse(v55), load(tbl, 12 * VECWIDTH + tbloffset), times(v55, load(tbl, 13 * VECWIDTH + tbloffset)))); real2 v15 = minusplus(uminus(v11), v12); real2 v13 = minusplus(v11, v12); real2 v23 = timesminusplus(reverse(v13), load(tbl, 2 * VECWIDTH + tbloffset), times(v13, load(tbl, 3 * VECWIDTH + tbloffset))); scatter(out, 1, 8, plus(v23, v43)); real2 v78 = minus(v23, v43); scatter(out, 5, 8, timesminusplus(v78, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v78), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v49 = timesminusplus(reverse(v35), load(tbl, 8 * VECWIDTH + tbloffset), times(v35, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v29 = timesminusplus(reverse(v15), load(tbl, 4 * VECWIDTH + tbloffset), times(v15, load(tbl, 5 * VECWIDTH + tbloffset))); scatter(out, 3, 8, plus(v29, v49)); real2 v84 = minus(v29, v49); scatter(out, 7, 8, timesminusplus(v84, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v84), load(tbl, 1 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void tbut8b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v9 = load(in, 7 << inShift); real2 v5 = load(in, 3 << inShift); real2 v37 = plus(v5, v9); real2 v31 = reverse(minus(v9, v5)); real2 v7 = load(in, 5 << inShift); real2 v3 = load(in, 1 << inShift); real2 v36 = plus(v3, v7); real2 v32 = minus(v7, v3); real2 v57 = plus(v36, v37); real2 v51 = reverse(minus(v37, v36)); real2 v35 = minusplus(uminus(v31), v32); real2 v33 = minusplus(v31, v32); real2 v43 = timesminusplus(reverse(v33), load(tbl, 6 * VECWIDTH + tbloffset), times(v33, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v6 = load(in, 4 << inShift); real2 v2 = load(in, 0 << inShift); real2 v16 = plus(v2, v6); real2 v12 = minus(v6, v2); real2 v8 = load(in, 6 << inShift); real2 v4 = load(in, 2 << inShift); real2 v17 = plus(v4, v8); real2 v11 = reverse(minus(v8, v4)); real2 v52 = minus(v17, v16); real2 v56 = plus(v16, v17); scatter(out, 0, 8, plus(v56, v57)); real2 v70 = minus(v56, v57); scatter(out, 4, 8, timesminusplus(v70, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v70), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v53 = minusplus(v51, v52); scatter(out, 2, 8, timesminusplus(reverse(v53), load(tbl, 10 * VECWIDTH + tbloffset), times(v53, load(tbl, 11 * VECWIDTH + tbloffset)))); real2 v55 = minusplus(uminus(v51), v52); scatter(out, 6, 8, timesminusplus(reverse(v55), load(tbl, 12 * VECWIDTH + tbloffset), times(v55, load(tbl, 13 * VECWIDTH + tbloffset)))); real2 v15 = minusplus(uminus(v11), v12); real2 v13 = minusplus(v11, v12); real2 v23 = timesminusplus(reverse(v13), load(tbl, 2 * VECWIDTH + tbloffset), times(v13, load(tbl, 3 * VECWIDTH + tbloffset))); scatter(out, 1, 8, plus(v23, v43)); real2 v78 = minus(v23, v43); scatter(out, 5, 8, timesminusplus(v78, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v78), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v49 = timesminusplus(reverse(v35), load(tbl, 8 * VECWIDTH + tbloffset), times(v35, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v29 = timesminusplus(reverse(v15), load(tbl, 4 * VECWIDTH + tbloffset), times(v15, load(tbl, 5 * VECWIDTH + tbloffset))); scatter(out, 3, 8, plus(v29, v49)); real2 v84 = minus(v29, v49); scatter(out, 7, 8, timesminusplus(v84, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v84), load(tbl, 1 * VECWIDTH + tbloffset)))); } } #endif #if MAXBUTWIDTH >= 4 ALIGNED(8192) void dft16f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v11 = load(in, 9 << shift); real2 v3 = load(in, 1 << shift); real2 v40 = plus(v3, v11); real2 v36 = minus(v11, v3); real2 v15 = load(in, 13 << shift); real2 v7 = load(in, 5 << shift); real2 v35 = reverse(minus(v7, v15)); real2 v41 = plus(v7, v15); real2 v106 = minus(v41, v40); real2 v110 = plus(v40, v41); real2 v37 = minusplus(v35, v36); real2 v39 = minusplus(uminus(v35), v36); real2 v51 = ctimesminusplus(reverse(v39), ctbl[5], ctimes(v39, ctbl[3])); real2 v47 = ctimesminusplus(reverse(v37), ctbl[3], ctimes(v37, ctbl[5])); real2 v13 = load(in, 11 << shift); real2 v5 = load(in, 3 << shift); real2 v72 = minus(v13, v5); real2 v76 = plus(v5, v13); real2 v17 = load(in, 15 << shift); real2 v9 = load(in, 7 << shift); real2 v77 = plus(v9, v17); real2 v71 = reverse(minus(v9, v17)); real2 v105 = reverse(minus(v76, v77)); real2 v111 = plus(v76, v77); real2 v107 = minusplus(v105, v106); real2 v109 = minusplus(uminus(v105), v106); real2 v121 = reverse(minus(v110, v111)); real2 v127 = plus(v110, v111); real2 v119 = ctimesminusplus(reverse(v109), ctbl[1], ctimes(v109, ctbl[0])); real2 v115 = ctimesminusplus(reverse(v107), ctbl[1], ctimes(v107, ctbl[1])); real2 v8 = load(in, 6 << shift); real2 v16 = load(in, 14 << shift); real2 v53 = reverse(minus(v8, v16)); real2 v59 = plus(v8, v16); real2 v4 = load(in, 2 << shift); real2 v12 = load(in, 10 << shift); real2 v54 = minus(v12, v4); real2 v58 = plus(v4, v12); real2 v95 = plus(v58, v59); real2 v89 = reverse(minus(v58, v59)); real2 v2 = load(in, 0 << shift); real2 v10 = load(in, 8 << shift); real2 v24 = plus(v2, v10); real2 v20 = minus(v10, v2); real2 v6 = load(in, 4 << shift); real2 v14 = load(in, 12 << shift); real2 v19 = reverse(minus(v6, v14)); real2 v25 = plus(v6, v14); real2 v94 = plus(v24, v25); real2 v90 = minus(v25, v24); real2 v103 = minus(uminusplus(v89), v90); real2 v99 = minus(uplusminus(v89), v90); store(out, 2 << shift, plus(v99, v115)); store(out, 10 << shift, minus(v99, v115)); store(out, 6 << shift, plus(v103, v119)); store(out, 14 << shift, minus(v103, v119)); real2 v122 = minus(v95, v94); store(out, 12 << shift, minus(uminusplus(v121), v122)); store(out, 4 << shift, minus(uplusminus(v121), v122)); real2 v126 = plus(v94, v95); store(out, 8 << shift, minus(v126, v127)); store(out, 0 << shift, plus(v126, v127)); real2 v57 = minusplus(uminus(v53), v54); real2 v55 = minusplus(v53, v54); real2 v64 = ctimesminusplus(reverse(v55), ctbl[1], ctimes(v55, ctbl[1])); real2 v75 = minusplus(uminus(v71), v72); real2 v73 = minusplus(v71, v72); real2 v81 = ctimesminusplus(reverse(v73), ctbl[5], ctimes(v73, ctbl[3])); real2 v29 = minus(uplusminus(v19), v20); real2 v33 = minus(uminusplus(v19), v20); real2 v151 = plus(v29, v64); real2 v147 = minus(v64, v29); real2 v152 = plus(v47, v81); real2 v146 = reverse(minus(v47, v81)); store(out, 13 << shift, minus(uminusplus(v146), v147)); store(out, 5 << shift, minus(uplusminus(v146), v147)); store(out, 9 << shift, minus(v151, v152)); store(out, 1 << shift, plus(v151, v152)); real2 v69 = ctimesminusplus(reverse(v57), ctbl[1], ctimes(v57, ctbl[0])); real2 v87 = ctimesminusplus(reverse(v75), ctbl[4], ctimes(v75, ctbl[2])); real2 v171 = plus(v51, v87); real2 v165 = reverse(minus(v51, v87)); real2 v170 = plus(v33, v69); real2 v166 = minus(v69, v33); store(out, 7 << shift, minus(uplusminus(v165), v166)); store(out, 15 << shift, minus(uminusplus(v165), v166)); store(out, 11 << shift, minus(v170, v171)); store(out, 3 << shift, plus(v170, v171)); } } ALIGNED(8192) void dft16b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v9 = load(in, 7 << shift); real2 v17 = load(in, 15 << shift); real2 v79 = plus(v9, v17); real2 v73 = reverse(minus(v17, v9)); real2 v13 = load(in, 11 << shift); real2 v5 = load(in, 3 << shift); real2 v78 = plus(v5, v13); real2 v74 = minus(v13, v5); real2 v105 = reverse(minus(v79, v78)); real2 v111 = plus(v78, v79); real2 v75 = minusplus(v73, v74); real2 v77 = minusplus(uminus(v73), v74); real2 v83 = ctimesminusplus(reverse(v75), ctbl[2], ctimes(v75, ctbl[3])); real2 v7 = load(in, 5 << shift); real2 v15 = load(in, 13 << shift); real2 v41 = plus(v7, v15); real2 v35 = reverse(minus(v15, v7)); real2 v11 = load(in, 9 << shift); real2 v3 = load(in, 1 << shift); real2 v40 = plus(v3, v11); real2 v36 = minus(v11, v3); real2 v110 = plus(v40, v41); real2 v106 = minus(v41, v40); real2 v121 = reverse(minus(v111, v110)); real2 v127 = plus(v110, v111); real2 v109 = minusplus(uminus(v105), v106); real2 v107 = minusplus(v105, v106); real2 v119 = ctimesminusplus(reverse(v109), ctbl[0], ctimes(v109, ctbl[0])); real2 v115 = ctimesminusplus(reverse(v107), ctbl[0], ctimes(v107, ctbl[1])); real2 v16 = load(in, 14 << shift); real2 v8 = load(in, 6 << shift); real2 v55 = reverse(minus(v16, v8)); real2 v61 = plus(v8, v16); real2 v12 = load(in, 10 << shift); real2 v4 = load(in, 2 << shift); real2 v56 = minus(v12, v4); real2 v60 = plus(v4, v12); real2 v89 = reverse(minus(v61, v60)); real2 v95 = plus(v60, v61); real2 v14 = load(in, 12 << shift); real2 v6 = load(in, 4 << shift); real2 v19 = reverse(minus(v14, v6)); real2 v25 = plus(v6, v14); real2 v2 = load(in, 0 << shift); real2 v10 = load(in, 8 << shift); real2 v24 = plus(v2, v10); real2 v20 = minus(v10, v2); real2 v90 = minus(v25, v24); real2 v94 = plus(v24, v25); real2 v103 = minus(uminusplus(v89), v90); store(out, 6 << shift, plus(v103, v119)); store(out, 14 << shift, minus(v103, v119)); real2 v99 = minus(uplusminus(v89), v90); store(out, 10 << shift, minus(v99, v115)); store(out, 2 << shift, plus(v99, v115)); real2 v126 = plus(v94, v95); store(out, 8 << shift, minus(v126, v127)); store(out, 0 << shift, plus(v126, v127)); real2 v122 = minus(v95, v94); store(out, 12 << shift, minus(uminusplus(v121), v122)); store(out, 4 << shift, minus(uplusminus(v121), v122)); real2 v33 = minus(uminusplus(v19), v20); real2 v29 = minus(uplusminus(v19), v20); real2 v59 = minusplus(uminus(v55), v56); real2 v57 = minusplus(v55, v56); real2 v67 = ctimesminusplus(reverse(v57), ctbl[0], ctimes(v57, ctbl[1])); real2 v39 = minusplus(uminus(v35), v36); real2 v37 = minusplus(v35, v36); real2 v47 = ctimesminusplus(reverse(v37), ctbl[4], ctimes(v37, ctbl[5])); real2 v146 = reverse(minus(v83, v47)); real2 v152 = plus(v47, v83); real2 v147 = minus(v67, v29); real2 v151 = plus(v29, v67); store(out, 9 << shift, minus(v151, v152)); store(out, 1 << shift, plus(v151, v152)); store(out, 5 << shift, minus(uplusminus(v146), v147)); store(out, 13 << shift, minus(uminusplus(v146), v147)); real2 v53 = ctimesminusplus(reverse(v39), ctbl[2], ctimes(v39, ctbl[3])); real2 v71 = ctimesminusplus(reverse(v59), ctbl[0], ctimes(v59, ctbl[0])); real2 v166 = minus(v71, v33); real2 v170 = plus(v33, v71); real2 v87 = ctimesminusplus(reverse(v77), ctbl[3], ctimes(v77, ctbl[2])); real2 v165 = reverse(minus(v87, v53)); store(out, 15 << shift, minus(uminusplus(v165), v166)); store(out, 7 << shift, minus(uplusminus(v165), v166)); real2 v171 = plus(v53, v87); store(out, 3 << shift, plus(v170, v171)); store(out, 11 << shift, minus(v170, v171)); } } ALIGNED(8192) void but16f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v15 = load(in, 13 << inShift); real2 v7 = load(in, 5 << inShift); real2 v45 = plus(v7, v15); real2 v39 = reverse(minus(v7, v15)); real2 v3 = load(in, 1 << inShift); real2 v11 = load(in, 9 << inShift); real2 v40 = minus(v11, v3); real2 v44 = plus(v3, v11); real2 v124 = plus(v44, v45); real2 v120 = minus(v45, v44); real2 v41 = minusplus(v39, v40); real2 v43 = minusplus(uminus(v39), v40); real2 v57 = ctimesminusplus(reverse(v43), tbl[8 + tbloffset], ctimes(v43, tbl[9 + tbloffset])); real2 v13 = load(in, 11 << inShift); real2 v5 = load(in, 3 << inShift); real2 v84 = plus(v5, v13); real2 v80 = minus(v13, v5); real2 v17 = load(in, 15 << inShift); real2 v9 = load(in, 7 << inShift); real2 v85 = plus(v9, v17); real2 v79 = reverse(minus(v9, v17)); real2 v119 = reverse(minus(v84, v85)); real2 v125 = plus(v84, v85); real2 v145 = plus(v124, v125); real2 v139 = reverse(minus(v124, v125)); real2 v121 = minusplus(v119, v120); real2 v123 = minusplus(uminus(v119), v120); real2 v137 = ctimesminusplus(reverse(v123), tbl[24 + tbloffset], ctimes(v123, tbl[25 + tbloffset])); real2 v131 = ctimesminusplus(reverse(v121), tbl[22 + tbloffset], ctimes(v121, tbl[23 + tbloffset])); real2 v4 = load(in, 2 << inShift); real2 v12 = load(in, 10 << inShift); real2 v64 = plus(v4, v12); real2 v60 = minus(v12, v4); real2 v8 = load(in, 6 << inShift); real2 v16 = load(in, 14 << inShift); real2 v65 = plus(v8, v16); real2 v59 = reverse(minus(v8, v16)); real2 v99 = reverse(minus(v64, v65)); real2 v105 = plus(v64, v65); real2 v14 = load(in, 12 << inShift); real2 v6 = load(in, 4 << inShift); real2 v25 = plus(v6, v14); real2 v19 = reverse(minus(v6, v14)); real2 v10 = load(in, 8 << inShift); real2 v2 = load(in, 0 << inShift); real2 v20 = minus(v10, v2); real2 v24 = plus(v2, v10); real2 v104 = plus(v24, v25); real2 v100 = minus(v25, v24); real2 v140 = minus(v105, v104); real2 v144 = plus(v104, v105); store(out, 0 << outShift, plus(v144, v145)); real2 v158 = minus(v144, v145); store(out, 8 << outShift, ctimesminusplus(v158, tbl[0 + tbloffset], ctimes(reverse(v158), tbl[1 + tbloffset]))); real2 v143 = minusplus(uminus(v139), v140); store(out, 12 << outShift, ctimesminusplus(reverse(v143), tbl[28 + tbloffset], ctimes(v143, tbl[29 + tbloffset]))); real2 v141 = minusplus(v139, v140); store(out, 4 << outShift, ctimesminusplus(reverse(v141), tbl[26 + tbloffset], ctimes(v141, tbl[27 + tbloffset]))); real2 v101 = minusplus(v99, v100); real2 v103 = minusplus(uminus(v99), v100); real2 v117 = ctimesminusplus(reverse(v103), tbl[20 + tbloffset], ctimes(v103, tbl[21 + tbloffset])); store(out, 6 << outShift, plus(v117, v137)); real2 v172 = minus(v117, v137); store(out, 14 << outShift, ctimesminusplus(v172, tbl[0 + tbloffset], ctimes(reverse(v172), tbl[1 + tbloffset]))); real2 v111 = ctimesminusplus(reverse(v101), tbl[18 + tbloffset], ctimes(v101, tbl[19 + tbloffset])); store(out, 2 << outShift, plus(v111, v131)); real2 v166 = minus(v111, v131); store(out, 10 << outShift, ctimesminusplus(v166, tbl[0 + tbloffset], ctimes(reverse(v166), tbl[1 + tbloffset]))); real2 v23 = minusplus(uminus(v19), v20); real2 v21 = minusplus(v19, v20); real2 v81 = minusplus(v79, v80); real2 v83 = minusplus(uminus(v79), v80); real2 v97 = ctimesminusplus(reverse(v83), tbl[16 + tbloffset], ctimes(v83, tbl[17 + tbloffset])); real2 v211 = plus(v57, v97); real2 v205 = reverse(minus(v57, v97)); real2 v61 = minusplus(v59, v60); real2 v63 = minusplus(uminus(v59), v60); real2 v77 = ctimesminusplus(reverse(v63), tbl[12 + tbloffset], ctimes(v63, tbl[13 + tbloffset])); real2 v37 = ctimesminusplus(reverse(v23), tbl[4 + tbloffset], ctimes(v23, tbl[5 + tbloffset])); real2 v210 = plus(v37, v77); real2 v206 = minus(v77, v37); store(out, 3 << outShift, plus(v210, v211)); real2 v224 = minus(v210, v211); store(out, 11 << outShift, ctimesminusplus(v224, tbl[0 + tbloffset], ctimes(reverse(v224), tbl[1 + tbloffset]))); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); store(out, 15 << outShift, ctimesminusplus(reverse(v209), tbl[36 + tbloffset], ctimes(v209, tbl[37 + tbloffset]))); store(out, 7 << outShift, ctimesminusplus(reverse(v207), tbl[34 + tbloffset], ctimes(v207, tbl[35 + tbloffset]))); real2 v71 = ctimesminusplus(reverse(v61), tbl[10 + tbloffset], ctimes(v61, tbl[11 + tbloffset])); real2 v51 = ctimesminusplus(reverse(v41), tbl[6 + tbloffset], ctimes(v41, tbl[7 + tbloffset])); real2 v91 = ctimesminusplus(reverse(v81), tbl[14 + tbloffset], ctimes(v81, tbl[15 + tbloffset])); real2 v185 = plus(v51, v91); real2 v179 = reverse(minus(v51, v91)); real2 v31 = ctimesminusplus(reverse(v21), tbl[2 + tbloffset], ctimes(v21, tbl[3 + tbloffset])); real2 v184 = plus(v31, v71); real2 v180 = minus(v71, v31); store(out, 1 << outShift, plus(v184, v185)); real2 v198 = minus(v184, v185); store(out, 9 << outShift, ctimesminusplus(v198, tbl[0 + tbloffset], ctimes(reverse(v198), tbl[1 + tbloffset]))); real2 v181 = minusplus(v179, v180); store(out, 5 << outShift, ctimesminusplus(reverse(v181), tbl[30 + tbloffset], ctimes(v181, tbl[31 + tbloffset]))); real2 v183 = minusplus(uminus(v179), v180); store(out, 13 << outShift, ctimesminusplus(reverse(v183), tbl[32 + tbloffset], ctimes(v183, tbl[33 + tbloffset]))); } } ALIGNED(8192) void but16b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v15 = load(in, 13 << inShift); real2 v7 = load(in, 5 << inShift); real2 v45 = plus(v7, v15); real2 v39 = reverse(minus(v15, v7)); real2 v3 = load(in, 1 << inShift); real2 v11 = load(in, 9 << inShift); real2 v40 = minus(v11, v3); real2 v44 = plus(v3, v11); real2 v124 = plus(v44, v45); real2 v120 = minus(v45, v44); real2 v41 = minusplus(v39, v40); real2 v43 = minusplus(uminus(v39), v40); real2 v57 = ctimesminusplus(reverse(v43), tbl[8 + tbloffset], ctimes(v43, tbl[9 + tbloffset])); real2 v13 = load(in, 11 << inShift); real2 v5 = load(in, 3 << inShift); real2 v84 = plus(v5, v13); real2 v80 = minus(v13, v5); real2 v17 = load(in, 15 << inShift); real2 v9 = load(in, 7 << inShift); real2 v85 = plus(v9, v17); real2 v79 = reverse(minus(v17, v9)); real2 v119 = reverse(minus(v85, v84)); real2 v125 = plus(v84, v85); real2 v145 = plus(v124, v125); real2 v139 = reverse(minus(v125, v124)); real2 v121 = minusplus(v119, v120); real2 v123 = minusplus(uminus(v119), v120); real2 v137 = ctimesminusplus(reverse(v123), tbl[24 + tbloffset], ctimes(v123, tbl[25 + tbloffset])); real2 v131 = ctimesminusplus(reverse(v121), tbl[22 + tbloffset], ctimes(v121, tbl[23 + tbloffset])); real2 v4 = load(in, 2 << inShift); real2 v12 = load(in, 10 << inShift); real2 v64 = plus(v4, v12); real2 v60 = minus(v12, v4); real2 v8 = load(in, 6 << inShift); real2 v16 = load(in, 14 << inShift); real2 v65 = plus(v8, v16); real2 v59 = reverse(minus(v16, v8)); real2 v99 = reverse(minus(v65, v64)); real2 v105 = plus(v64, v65); real2 v14 = load(in, 12 << inShift); real2 v6 = load(in, 4 << inShift); real2 v25 = plus(v6, v14); real2 v19 = reverse(minus(v14, v6)); real2 v10 = load(in, 8 << inShift); real2 v2 = load(in, 0 << inShift); real2 v20 = minus(v10, v2); real2 v24 = plus(v2, v10); real2 v104 = plus(v24, v25); real2 v100 = minus(v25, v24); real2 v140 = minus(v105, v104); real2 v144 = plus(v104, v105); store(out, 0 << outShift, plus(v144, v145)); real2 v158 = minus(v144, v145); store(out, 8 << outShift, ctimesminusplus(v158, tbl[0 + tbloffset], ctimes(reverse(v158), tbl[1 + tbloffset]))); real2 v143 = minusplus(uminus(v139), v140); store(out, 12 << outShift, ctimesminusplus(reverse(v143), tbl[28 + tbloffset], ctimes(v143, tbl[29 + tbloffset]))); real2 v141 = minusplus(v139, v140); store(out, 4 << outShift, ctimesminusplus(reverse(v141), tbl[26 + tbloffset], ctimes(v141, tbl[27 + tbloffset]))); real2 v101 = minusplus(v99, v100); real2 v103 = minusplus(uminus(v99), v100); real2 v117 = ctimesminusplus(reverse(v103), tbl[20 + tbloffset], ctimes(v103, tbl[21 + tbloffset])); store(out, 6 << outShift, plus(v117, v137)); real2 v172 = minus(v117, v137); store(out, 14 << outShift, ctimesminusplus(v172, tbl[0 + tbloffset], ctimes(reverse(v172), tbl[1 + tbloffset]))); real2 v111 = ctimesminusplus(reverse(v101), tbl[18 + tbloffset], ctimes(v101, tbl[19 + tbloffset])); store(out, 2 << outShift, plus(v111, v131)); real2 v166 = minus(v111, v131); store(out, 10 << outShift, ctimesminusplus(v166, tbl[0 + tbloffset], ctimes(reverse(v166), tbl[1 + tbloffset]))); real2 v23 = minusplus(uminus(v19), v20); real2 v21 = minusplus(v19, v20); real2 v81 = minusplus(v79, v80); real2 v83 = minusplus(uminus(v79), v80); real2 v97 = ctimesminusplus(reverse(v83), tbl[16 + tbloffset], ctimes(v83, tbl[17 + tbloffset])); real2 v211 = plus(v57, v97); real2 v205 = reverse(minus(v97, v57)); real2 v61 = minusplus(v59, v60); real2 v63 = minusplus(uminus(v59), v60); real2 v77 = ctimesminusplus(reverse(v63), tbl[12 + tbloffset], ctimes(v63, tbl[13 + tbloffset])); real2 v37 = ctimesminusplus(reverse(v23), tbl[4 + tbloffset], ctimes(v23, tbl[5 + tbloffset])); real2 v210 = plus(v37, v77); real2 v206 = minus(v77, v37); store(out, 3 << outShift, plus(v210, v211)); real2 v224 = minus(v210, v211); store(out, 11 << outShift, ctimesminusplus(v224, tbl[0 + tbloffset], ctimes(reverse(v224), tbl[1 + tbloffset]))); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); store(out, 15 << outShift, ctimesminusplus(reverse(v209), tbl[36 + tbloffset], ctimes(v209, tbl[37 + tbloffset]))); store(out, 7 << outShift, ctimesminusplus(reverse(v207), tbl[34 + tbloffset], ctimes(v207, tbl[35 + tbloffset]))); real2 v71 = ctimesminusplus(reverse(v61), tbl[10 + tbloffset], ctimes(v61, tbl[11 + tbloffset])); real2 v51 = ctimesminusplus(reverse(v41), tbl[6 + tbloffset], ctimes(v41, tbl[7 + tbloffset])); real2 v91 = ctimesminusplus(reverse(v81), tbl[14 + tbloffset], ctimes(v81, tbl[15 + tbloffset])); real2 v185 = plus(v51, v91); real2 v179 = reverse(minus(v91, v51)); real2 v31 = ctimesminusplus(reverse(v21), tbl[2 + tbloffset], ctimes(v21, tbl[3 + tbloffset])); real2 v184 = plus(v31, v71); real2 v180 = minus(v71, v31); store(out, 1 << outShift, plus(v184, v185)); real2 v198 = minus(v184, v185); store(out, 9 << outShift, ctimesminusplus(v198, tbl[0 + tbloffset], ctimes(reverse(v198), tbl[1 + tbloffset]))); real2 v181 = minusplus(v179, v180); store(out, 5 << outShift, ctimesminusplus(reverse(v181), tbl[30 + tbloffset], ctimes(v181, tbl[31 + tbloffset]))); real2 v183 = minusplus(uminus(v179), v180); store(out, 13 << outShift, ctimesminusplus(reverse(v183), tbl[32 + tbloffset], ctimes(v183, tbl[33 + tbloffset]))); } } ALIGNED(8192) void tbut16f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v15 = load(in, 13 << inShift); real2 v7 = load(in, 5 << inShift); real2 v45 = plus(v7, v15); real2 v39 = reverse(minus(v7, v15)); real2 v3 = load(in, 1 << inShift); real2 v11 = load(in, 9 << inShift); real2 v40 = minus(v11, v3); real2 v44 = plus(v3, v11); real2 v124 = plus(v44, v45); real2 v120 = minus(v45, v44); real2 v41 = minusplus(v39, v40); real2 v43 = minusplus(uminus(v39), v40); real2 v57 = timesminusplus(reverse(v43), load(tbl, 8 * VECWIDTH + tbloffset), times(v43, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v13 = load(in, 11 << inShift); real2 v5 = load(in, 3 << inShift); real2 v84 = plus(v5, v13); real2 v80 = minus(v13, v5); real2 v17 = load(in, 15 << inShift); real2 v9 = load(in, 7 << inShift); real2 v85 = plus(v9, v17); real2 v79 = reverse(minus(v9, v17)); real2 v119 = reverse(minus(v84, v85)); real2 v125 = plus(v84, v85); real2 v145 = plus(v124, v125); real2 v139 = reverse(minus(v124, v125)); real2 v121 = minusplus(v119, v120); real2 v123 = minusplus(uminus(v119), v120); real2 v137 = timesminusplus(reverse(v123), load(tbl, 24 * VECWIDTH + tbloffset), times(v123, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v131 = timesminusplus(reverse(v121), load(tbl, 22 * VECWIDTH + tbloffset), times(v121, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v4 = load(in, 2 << inShift); real2 v12 = load(in, 10 << inShift); real2 v64 = plus(v4, v12); real2 v60 = minus(v12, v4); real2 v8 = load(in, 6 << inShift); real2 v16 = load(in, 14 << inShift); real2 v65 = plus(v8, v16); real2 v59 = reverse(minus(v8, v16)); real2 v99 = reverse(minus(v64, v65)); real2 v105 = plus(v64, v65); real2 v14 = load(in, 12 << inShift); real2 v6 = load(in, 4 << inShift); real2 v25 = plus(v6, v14); real2 v19 = reverse(minus(v6, v14)); real2 v10 = load(in, 8 << inShift); real2 v2 = load(in, 0 << inShift); real2 v20 = minus(v10, v2); real2 v24 = plus(v2, v10); real2 v104 = plus(v24, v25); real2 v100 = minus(v25, v24); real2 v140 = minus(v105, v104); real2 v144 = plus(v104, v105); scatter(out, 0, 16, plus(v144, v145)); real2 v158 = minus(v144, v145); scatter(out, 8, 16, timesminusplus(v158, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v158), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v143 = minusplus(uminus(v139), v140); scatter(out, 12, 16, timesminusplus(reverse(v143), load(tbl, 28 * VECWIDTH + tbloffset), times(v143, load(tbl, 29 * VECWIDTH + tbloffset)))); real2 v141 = minusplus(v139, v140); scatter(out, 4, 16, timesminusplus(reverse(v141), load(tbl, 26 * VECWIDTH + tbloffset), times(v141, load(tbl, 27 * VECWIDTH + tbloffset)))); real2 v101 = minusplus(v99, v100); real2 v103 = minusplus(uminus(v99), v100); real2 v117 = timesminusplus(reverse(v103), load(tbl, 20 * VECWIDTH + tbloffset), times(v103, load(tbl, 21 * VECWIDTH + tbloffset))); scatter(out, 6, 16, plus(v117, v137)); real2 v172 = minus(v117, v137); scatter(out, 14, 16, timesminusplus(v172, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v172), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v111 = timesminusplus(reverse(v101), load(tbl, 18 * VECWIDTH + tbloffset), times(v101, load(tbl, 19 * VECWIDTH + tbloffset))); scatter(out, 2, 16, plus(v111, v131)); real2 v166 = minus(v111, v131); scatter(out, 10, 16, timesminusplus(v166, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v166), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v23 = minusplus(uminus(v19), v20); real2 v21 = minusplus(v19, v20); real2 v81 = minusplus(v79, v80); real2 v83 = minusplus(uminus(v79), v80); real2 v97 = timesminusplus(reverse(v83), load(tbl, 16 * VECWIDTH + tbloffset), times(v83, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v211 = plus(v57, v97); real2 v205 = reverse(minus(v57, v97)); real2 v61 = minusplus(v59, v60); real2 v63 = minusplus(uminus(v59), v60); real2 v77 = timesminusplus(reverse(v63), load(tbl, 12 * VECWIDTH + tbloffset), times(v63, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v37 = timesminusplus(reverse(v23), load(tbl, 4 * VECWIDTH + tbloffset), times(v23, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v210 = plus(v37, v77); real2 v206 = minus(v77, v37); scatter(out, 3, 16, plus(v210, v211)); real2 v224 = minus(v210, v211); scatter(out, 11, 16, timesminusplus(v224, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v224), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); scatter(out, 15, 16, timesminusplus(reverse(v209), load(tbl, 36 * VECWIDTH + tbloffset), times(v209, load(tbl, 37 * VECWIDTH + tbloffset)))); scatter(out, 7, 16, timesminusplus(reverse(v207), load(tbl, 34 * VECWIDTH + tbloffset), times(v207, load(tbl, 35 * VECWIDTH + tbloffset)))); real2 v71 = timesminusplus(reverse(v61), load(tbl, 10 * VECWIDTH + tbloffset), times(v61, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v51 = timesminusplus(reverse(v41), load(tbl, 6 * VECWIDTH + tbloffset), times(v41, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v91 = timesminusplus(reverse(v81), load(tbl, 14 * VECWIDTH + tbloffset), times(v81, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v185 = plus(v51, v91); real2 v179 = reverse(minus(v51, v91)); real2 v31 = timesminusplus(reverse(v21), load(tbl, 2 * VECWIDTH + tbloffset), times(v21, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v184 = plus(v31, v71); real2 v180 = minus(v71, v31); scatter(out, 1, 16, plus(v184, v185)); real2 v198 = minus(v184, v185); scatter(out, 9, 16, timesminusplus(v198, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v198), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v181 = minusplus(v179, v180); scatter(out, 5, 16, timesminusplus(reverse(v181), load(tbl, 30 * VECWIDTH + tbloffset), times(v181, load(tbl, 31 * VECWIDTH + tbloffset)))); real2 v183 = minusplus(uminus(v179), v180); scatter(out, 13, 16, timesminusplus(reverse(v183), load(tbl, 32 * VECWIDTH + tbloffset), times(v183, load(tbl, 33 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void tbut16b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v15 = load(in, 13 << inShift); real2 v7 = load(in, 5 << inShift); real2 v45 = plus(v7, v15); real2 v39 = reverse(minus(v15, v7)); real2 v3 = load(in, 1 << inShift); real2 v11 = load(in, 9 << inShift); real2 v40 = minus(v11, v3); real2 v44 = plus(v3, v11); real2 v124 = plus(v44, v45); real2 v120 = minus(v45, v44); real2 v41 = minusplus(v39, v40); real2 v43 = minusplus(uminus(v39), v40); real2 v57 = timesminusplus(reverse(v43), load(tbl, 8 * VECWIDTH + tbloffset), times(v43, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v13 = load(in, 11 << inShift); real2 v5 = load(in, 3 << inShift); real2 v84 = plus(v5, v13); real2 v80 = minus(v13, v5); real2 v17 = load(in, 15 << inShift); real2 v9 = load(in, 7 << inShift); real2 v85 = plus(v9, v17); real2 v79 = reverse(minus(v17, v9)); real2 v119 = reverse(minus(v85, v84)); real2 v125 = plus(v84, v85); real2 v145 = plus(v124, v125); real2 v139 = reverse(minus(v125, v124)); real2 v121 = minusplus(v119, v120); real2 v123 = minusplus(uminus(v119), v120); real2 v137 = timesminusplus(reverse(v123), load(tbl, 24 * VECWIDTH + tbloffset), times(v123, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v131 = timesminusplus(reverse(v121), load(tbl, 22 * VECWIDTH + tbloffset), times(v121, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v4 = load(in, 2 << inShift); real2 v12 = load(in, 10 << inShift); real2 v64 = plus(v4, v12); real2 v60 = minus(v12, v4); real2 v8 = load(in, 6 << inShift); real2 v16 = load(in, 14 << inShift); real2 v65 = plus(v8, v16); real2 v59 = reverse(minus(v16, v8)); real2 v99 = reverse(minus(v65, v64)); real2 v105 = plus(v64, v65); real2 v14 = load(in, 12 << inShift); real2 v6 = load(in, 4 << inShift); real2 v25 = plus(v6, v14); real2 v19 = reverse(minus(v14, v6)); real2 v10 = load(in, 8 << inShift); real2 v2 = load(in, 0 << inShift); real2 v20 = minus(v10, v2); real2 v24 = plus(v2, v10); real2 v104 = plus(v24, v25); real2 v100 = minus(v25, v24); real2 v140 = minus(v105, v104); real2 v144 = plus(v104, v105); scatter(out, 0, 16, plus(v144, v145)); real2 v158 = minus(v144, v145); scatter(out, 8, 16, timesminusplus(v158, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v158), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v143 = minusplus(uminus(v139), v140); scatter(out, 12, 16, timesminusplus(reverse(v143), load(tbl, 28 * VECWIDTH + tbloffset), times(v143, load(tbl, 29 * VECWIDTH + tbloffset)))); real2 v141 = minusplus(v139, v140); scatter(out, 4, 16, timesminusplus(reverse(v141), load(tbl, 26 * VECWIDTH + tbloffset), times(v141, load(tbl, 27 * VECWIDTH + tbloffset)))); real2 v101 = minusplus(v99, v100); real2 v103 = minusplus(uminus(v99), v100); real2 v117 = timesminusplus(reverse(v103), load(tbl, 20 * VECWIDTH + tbloffset), times(v103, load(tbl, 21 * VECWIDTH + tbloffset))); scatter(out, 6, 16, plus(v117, v137)); real2 v172 = minus(v117, v137); scatter(out, 14, 16, timesminusplus(v172, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v172), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v111 = timesminusplus(reverse(v101), load(tbl, 18 * VECWIDTH + tbloffset), times(v101, load(tbl, 19 * VECWIDTH + tbloffset))); scatter(out, 2, 16, plus(v111, v131)); real2 v166 = minus(v111, v131); scatter(out, 10, 16, timesminusplus(v166, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v166), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v23 = minusplus(uminus(v19), v20); real2 v21 = minusplus(v19, v20); real2 v81 = minusplus(v79, v80); real2 v83 = minusplus(uminus(v79), v80); real2 v97 = timesminusplus(reverse(v83), load(tbl, 16 * VECWIDTH + tbloffset), times(v83, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v211 = plus(v57, v97); real2 v205 = reverse(minus(v97, v57)); real2 v61 = minusplus(v59, v60); real2 v63 = minusplus(uminus(v59), v60); real2 v77 = timesminusplus(reverse(v63), load(tbl, 12 * VECWIDTH + tbloffset), times(v63, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v37 = timesminusplus(reverse(v23), load(tbl, 4 * VECWIDTH + tbloffset), times(v23, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v210 = plus(v37, v77); real2 v206 = minus(v77, v37); scatter(out, 3, 16, plus(v210, v211)); real2 v224 = minus(v210, v211); scatter(out, 11, 16, timesminusplus(v224, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v224), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); scatter(out, 15, 16, timesminusplus(reverse(v209), load(tbl, 36 * VECWIDTH + tbloffset), times(v209, load(tbl, 37 * VECWIDTH + tbloffset)))); scatter(out, 7, 16, timesminusplus(reverse(v207), load(tbl, 34 * VECWIDTH + tbloffset), times(v207, load(tbl, 35 * VECWIDTH + tbloffset)))); real2 v71 = timesminusplus(reverse(v61), load(tbl, 10 * VECWIDTH + tbloffset), times(v61, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v51 = timesminusplus(reverse(v41), load(tbl, 6 * VECWIDTH + tbloffset), times(v41, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v91 = timesminusplus(reverse(v81), load(tbl, 14 * VECWIDTH + tbloffset), times(v81, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v185 = plus(v51, v91); real2 v179 = reverse(minus(v91, v51)); real2 v31 = timesminusplus(reverse(v21), load(tbl, 2 * VECWIDTH + tbloffset), times(v21, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v184 = plus(v31, v71); real2 v180 = minus(v71, v31); scatter(out, 1, 16, plus(v184, v185)); real2 v198 = minus(v184, v185); scatter(out, 9, 16, timesminusplus(v198, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v198), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v181 = minusplus(v179, v180); scatter(out, 5, 16, timesminusplus(reverse(v181), load(tbl, 30 * VECWIDTH + tbloffset), times(v181, load(tbl, 31 * VECWIDTH + tbloffset)))); real2 v183 = minusplus(uminus(v179), v180); scatter(out, 13, 16, timesminusplus(reverse(v183), load(tbl, 32 * VECWIDTH + tbloffset), times(v183, load(tbl, 33 * VECWIDTH + tbloffset)))); } } #endif #if MAXBUTWIDTH >= 5 ALIGNED(8192) void dft32f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v15 = load(in, 13 << shift); real2 v31 = load(in, 29 << shift); real2 v124 = reverse(minus(v15, v31)); real2 v130 = plus(v15, v31); real2 v23 = load(in, 21 << shift); real2 v7 = load(in, 5 << shift); real2 v129 = plus(v7, v23); real2 v125 = minus(v23, v7); real2 v193 = reverse(minus(v129, v130)); real2 v199 = plus(v129, v130); real2 v126 = minusplus(v124, v125); real2 v128 = minusplus(uminus(v124), v125); real2 v139 = ctimesminusplus(reverse(v128), ctbl[7], ctimes(v128, ctbl[6])); real2 v134 = ctimesminusplus(reverse(v126), ctbl[9], ctimes(v126, ctbl[11])); real2 v19 = load(in, 17 << shift); real2 v3 = load(in, 1 << shift); real2 v52 = minus(v19, v3); real2 v56 = plus(v3, v19); real2 v27 = load(in, 25 << shift); real2 v11 = load(in, 9 << shift); real2 v51 = reverse(minus(v11, v27)); real2 v57 = plus(v11, v27); real2 v194 = minus(v57, v56); real2 v198 = plus(v56, v57); real2 v53 = minusplus(v51, v52); real2 v55 = minusplus(uminus(v51), v52); real2 v69 = ctimesminusplus(reverse(v55), ctbl[11], ctimes(v55, ctbl[9])); real2 v262 = plus(v198, v199); real2 v258 = minus(v199, v198); real2 v195 = minusplus(v193, v194); real2 v197 = minusplus(uminus(v193), v194); real2 v207 = ctimesminusplus(reverse(v197), ctbl[5], ctimes(v197, ctbl[3])); real2 v414 = plus(v69, v139); real2 v410 = minus(v139, v69); real2 v203 = ctimesminusplus(reverse(v195), ctbl[3], ctimes(v195, ctbl[5])); real2 v17 = load(in, 15 << shift); real2 v33 = load(in, 31 << shift); real2 v159 = reverse(minus(v17, v33)); real2 v165 = plus(v17, v33); real2 v25 = load(in, 23 << shift); real2 v9 = load(in, 7 << shift); real2 v164 = plus(v9, v25); real2 v160 = minus(v25, v9); real2 v231 = plus(v164, v165); real2 v225 = reverse(minus(v164, v165)); real2 v161 = minusplus(v159, v160); real2 v163 = minusplus(uminus(v159), v160); real2 v175 = ctimesminusplus(reverse(v163), ctbl[10], ctimes(v163, ctbl[8])); real2 v13 = load(in, 11 << shift); real2 v29 = load(in, 27 << shift); real2 v95 = plus(v13, v29); real2 v89 = reverse(minus(v13, v29)); real2 v21 = load(in, 19 << shift); real2 v5 = load(in, 3 << shift); real2 v90 = minus(v21, v5); real2 v94 = plus(v5, v21); real2 v226 = minus(v95, v94); real2 v230 = plus(v94, v95); real2 v229 = minusplus(uminus(v225), v226); real2 v227 = minusplus(v225, v226); real2 v239 = ctimesminusplus(reverse(v229), ctbl[4], ctimes(v229, ctbl[2])); real2 v257 = reverse(minus(v230, v231)); real2 v263 = plus(v230, v231); real2 v235 = ctimesminusplus(reverse(v227), ctbl[5], ctimes(v227, ctbl[3])); real2 v261 = minusplus(uminus(v257), v258); real2 v259 = minusplus(v257, v258); real2 v267 = ctimesminusplus(reverse(v259), ctbl[1], ctimes(v259, ctbl[1])); real2 v298 = reverse(minus(v203, v235)); real2 v304 = plus(v203, v235); real2 v271 = ctimesminusplus(reverse(v261), ctbl[1], ctimes(v261, ctbl[0])); real2 v279 = plus(v262, v263); real2 v273 = reverse(minus(v262, v263)); real2 v317 = reverse(minus(v207, v239)); real2 v323 = plus(v207, v239); real2 v8 = load(in, 6 << shift); real2 v24 = load(in, 22 << shift); real2 v146 = plus(v8, v24); real2 v142 = minus(v24, v8); real2 v28 = load(in, 26 << shift); real2 v12 = load(in, 10 << shift); real2 v77 = plus(v12, v28); real2 v71 = reverse(minus(v12, v28)); real2 v16 = load(in, 14 << shift); real2 v32 = load(in, 30 << shift); real2 v147 = plus(v16, v32); real2 v141 = reverse(minus(v16, v32)); real2 v209 = reverse(minus(v146, v147)); real2 v215 = plus(v146, v147); real2 v20 = load(in, 18 << shift); real2 v4 = load(in, 2 << shift); real2 v72 = minus(v20, v4); real2 v76 = plus(v4, v20); real2 v214 = plus(v76, v77); real2 v210 = minus(v77, v76); real2 v247 = plus(v214, v215); real2 v241 = reverse(minus(v214, v215)); real2 v213 = minusplus(uminus(v209), v210); real2 v211 = minusplus(v209, v210); real2 v223 = ctimesminusplus(reverse(v213), ctbl[1], ctimes(v213, ctbl[0])); real2 v219 = ctimesminusplus(reverse(v211), ctbl[1], ctimes(v211, ctbl[1])); real2 v26 = load(in, 24 << shift); real2 v10 = load(in, 8 << shift); real2 v35 = reverse(minus(v10, v26)); real2 v41 = plus(v10, v26); real2 v2 = load(in, 0 << shift); real2 v18 = load(in, 16 << shift); real2 v40 = plus(v2, v18); real2 v36 = minus(v18, v2); real2 v178 = minus(v41, v40); real2 v182 = plus(v40, v41); real2 v6 = load(in, 4 << shift); real2 v22 = load(in, 20 << shift); real2 v107 = minus(v22, v6); real2 v111 = plus(v6, v22); real2 v14 = load(in, 12 << shift); real2 v30 = load(in, 28 << shift); real2 v112 = plus(v14, v30); real2 v106 = reverse(minus(v14, v30)); real2 v177 = reverse(minus(v111, v112)); real2 v183 = plus(v111, v112); real2 v191 = minus(uminusplus(v177), v178); real2 v187 = minus(uplusminus(v177), v178); real2 v322 = plus(v191, v223); real2 v318 = minus(v223, v191); store(out, 22 << shift, minus(v322, v323)); store(out, 6 << shift, plus(v322, v323)); store(out, 14 << shift, minus(uplusminus(v317), v318)); store(out, 30 << shift, minus(uminusplus(v317), v318)); real2 v246 = plus(v182, v183); real2 v242 = minus(v183, v182); real2 v274 = minus(v247, v246); store(out, 24 << shift, minus(uminusplus(v273), v274)); store(out, 8 << shift, minus(uplusminus(v273), v274)); real2 v278 = plus(v246, v247); store(out, 16 << shift, minus(v278, v279)); store(out, 0 << shift, plus(v278, v279)); real2 v303 = plus(v187, v219); store(out, 2 << shift, plus(v303, v304)); store(out, 18 << shift, minus(v303, v304)); real2 v299 = minus(v219, v187); store(out, 26 << shift, minus(uminusplus(v298), v299)); store(out, 10 << shift, minus(uplusminus(v298), v299)); real2 v255 = minus(uminusplus(v241), v242); real2 v251 = minus(uplusminus(v241), v242); store(out, 20 << shift, minus(v251, v267)); store(out, 4 << shift, plus(v251, v267)); store(out, 28 << shift, minus(v255, v271)); store(out, 12 << shift, plus(v255, v271)); real2 v75 = minusplus(uminus(v71), v72); real2 v73 = minusplus(v71, v72); real2 v143 = minusplus(v141, v142); real2 v145 = minusplus(uminus(v141), v142); real2 v157 = ctimesminusplus(reverse(v145), ctbl[4], ctimes(v145, ctbl[2])); real2 v87 = ctimesminusplus(reverse(v75), ctbl[5], ctimes(v75, ctbl[3])); real2 v91 = minusplus(v89, v90); real2 v93 = minusplus(uminus(v89), v90); real2 v104 = ctimesminusplus(reverse(v93), ctbl[13], ctimes(v93, ctbl[12])); real2 v399 = plus(v87, v157); real2 v393 = reverse(minus(v87, v157)); real2 v110 = minusplus(uminus(v106), v107); real2 v108 = minusplus(v106, v107); real2 v415 = plus(v104, v175); real2 v409 = reverse(minus(v104, v175)); real2 v411 = minusplus(v409, v410); real2 v413 = minusplus(uminus(v409), v410); real2 v49 = minus(uminusplus(v35), v36); real2 v45 = minus(uplusminus(v35), v36); real2 v122 = ctimesminusplus(reverse(v110), ctbl[1], ctimes(v110, ctbl[0])); real2 v423 = ctimesminusplus(reverse(v413), ctbl[1], ctimes(v413, ctbl[0])); real2 v398 = plus(v49, v122); real2 v394 = minus(v122, v49); real2 v407 = minus(uminusplus(v393), v394); store(out, 15 << shift, plus(v407, v423)); store(out, 31 << shift, minus(v407, v423)); real2 v403 = minus(uplusminus(v393), v394); real2 v419 = ctimesminusplus(reverse(v411), ctbl[1], ctimes(v411, ctbl[1])); store(out, 7 << shift, plus(v403, v419)); store(out, 23 << shift, minus(v403, v419)); real2 v431 = plus(v414, v415); real2 v425 = reverse(minus(v414, v415)); real2 v430 = plus(v398, v399); store(out, 19 << shift, minus(v430, v431)); store(out, 3 << shift, plus(v430, v431)); real2 v426 = minus(v399, v398); store(out, 27 << shift, minus(uminusplus(v425), v426)); store(out, 11 << shift, minus(uplusminus(v425), v426)); real2 v63 = ctimesminusplus(reverse(v53), ctbl[7], ctimes(v53, ctbl[13])); real2 v151 = ctimesminusplus(reverse(v143), ctbl[5], ctimes(v143, ctbl[3])); real2 v99 = ctimesminusplus(reverse(v91), ctbl[11], ctimes(v91, ctbl[9])); real2 v169 = ctimesminusplus(reverse(v161), ctbl[13], ctimes(v161, ctbl[7])); real2 v352 = reverse(minus(v99, v169)); real2 v358 = plus(v99, v169); real2 v357 = plus(v63, v134); real2 v353 = minus(v134, v63); real2 v117 = ctimesminusplus(reverse(v108), ctbl[1], ctimes(v108, ctbl[1])); real2 v374 = plus(v357, v358); real2 v368 = reverse(minus(v357, v358)); real2 v83 = ctimesminusplus(reverse(v73), ctbl[3], ctimes(v73, ctbl[5])); real2 v336 = reverse(minus(v83, v151)); real2 v342 = plus(v83, v151); real2 v341 = plus(v45, v117); real2 v337 = minus(v117, v45); real2 v373 = plus(v341, v342); real2 v369 = minus(v342, v341); store(out, 9 << shift, minus(uplusminus(v368), v369)); store(out, 25 << shift, minus(uminusplus(v368), v369)); store(out, 17 << shift, minus(v373, v374)); store(out, 1 << shift, plus(v373, v374)); real2 v354 = minusplus(v352, v353); real2 v356 = minusplus(uminus(v352), v353); real2 v362 = ctimesminusplus(reverse(v354), ctbl[1], ctimes(v354, ctbl[1])); real2 v346 = minus(uplusminus(v336), v337); store(out, 21 << shift, minus(v346, v362)); store(out, 5 << shift, plus(v346, v362)); real2 v350 = minus(uminusplus(v336), v337); real2 v366 = ctimesminusplus(reverse(v356), ctbl[1], ctimes(v356, ctbl[0])); store(out, 29 << shift, minus(v350, v366)); store(out, 13 << shift, plus(v350, v366)); } } ALIGNED(8192) void dft32b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; real2 v6 = load(in, 4 << shift); real2 v22 = load(in, 20 << shift); real2 v109 = minus(v22, v6); real2 v113 = plus(v6, v22); real2 v14 = load(in, 12 << shift); real2 v30 = load(in, 28 << shift); real2 v108 = reverse(minus(v30, v14)); real2 v114 = plus(v14, v30); real2 v183 = plus(v113, v114); real2 v177 = reverse(minus(v114, v113)); real2 v110 = minusplus(v108, v109); real2 v112 = minusplus(uminus(v108), v109); real2 v120 = ctimesminusplus(reverse(v110), ctbl[0], ctimes(v110, ctbl[1])); real2 v124 = ctimesminusplus(reverse(v112), ctbl[0], ctimes(v112, ctbl[0])); real2 v10 = load(in, 8 << shift); real2 v26 = load(in, 24 << shift); real2 v35 = reverse(minus(v26, v10)); real2 v41 = plus(v10, v26); real2 v18 = load(in, 16 << shift); real2 v2 = load(in, 0 << shift); real2 v36 = minus(v18, v2); real2 v40 = plus(v2, v18); real2 v178 = minus(v41, v40); real2 v182 = plus(v40, v41); real2 v45 = minus(uplusminus(v35), v36); real2 v49 = minus(uminusplus(v35), v36); real2 v398 = plus(v49, v124); real2 v394 = minus(v124, v49); real2 v242 = minus(v183, v182); real2 v246 = plus(v182, v183); real2 v341 = plus(v45, v120); real2 v337 = minus(v120, v45); real2 v187 = minus(uplusminus(v177), v178); real2 v191 = minus(uminusplus(v177), v178); real2 v7 = load(in, 5 << shift); real2 v23 = load(in, 21 << shift); real2 v131 = plus(v7, v23); real2 v127 = minus(v23, v7); real2 v15 = load(in, 13 << shift); real2 v31 = load(in, 29 << shift); real2 v126 = reverse(minus(v31, v15)); real2 v132 = plus(v15, v31); real2 v199 = plus(v131, v132); real2 v193 = reverse(minus(v132, v131)); real2 v128 = minusplus(v126, v127); real2 v130 = minusplus(uminus(v126), v127); real2 v138 = ctimesminusplus(reverse(v128), ctbl[10], ctimes(v128, ctbl[11])); real2 v21 = load(in, 19 << shift); real2 v5 = load(in, 3 << shift); real2 v92 = minus(v21, v5); real2 v96 = plus(v5, v21); real2 v29 = load(in, 27 << shift); real2 v13 = load(in, 11 << shift); real2 v97 = plus(v13, v29); real2 v91 = reverse(minus(v29, v13)); real2 v95 = minusplus(uminus(v91), v92); real2 v93 = minusplus(v91, v92); real2 v230 = plus(v96, v97); real2 v226 = minus(v97, v96); real2 v17 = load(in, 15 << shift); real2 v33 = load(in, 31 << shift); real2 v166 = plus(v17, v33); real2 v160 = reverse(minus(v33, v17)); real2 v9 = load(in, 7 << shift); real2 v25 = load(in, 23 << shift); real2 v161 = minus(v25, v9); real2 v165 = plus(v9, v25); real2 v231 = plus(v165, v166); real2 v225 = reverse(minus(v166, v165)); real2 v263 = plus(v230, v231); real2 v257 = reverse(minus(v231, v230)); real2 v229 = minusplus(uminus(v225), v226); real2 v227 = minusplus(v225, v226); real2 v235 = ctimesminusplus(reverse(v227), ctbl[2], ctimes(v227, ctbl[3])); real2 v3 = load(in, 1 << shift); real2 v19 = load(in, 17 << shift); real2 v52 = minus(v19, v3); real2 v56 = plus(v3, v19); real2 v27 = load(in, 25 << shift); real2 v11 = load(in, 9 << shift); real2 v51 = reverse(minus(v27, v11)); real2 v57 = plus(v11, v27); real2 v198 = plus(v56, v57); real2 v194 = minus(v57, v56); real2 v258 = minus(v199, v198); real2 v262 = plus(v198, v199); real2 v273 = reverse(minus(v263, v262)); real2 v279 = plus(v262, v263); real2 v259 = minusplus(v257, v258); real2 v261 = minusplus(uminus(v257), v258); real2 v271 = ctimesminusplus(reverse(v261), ctbl[0], ctimes(v261, ctbl[0])); real2 v197 = minusplus(uminus(v193), v194); real2 v195 = minusplus(v193, v194); real2 v203 = ctimesminusplus(reverse(v195), ctbl[4], ctimes(v195, ctbl[5])); real2 v298 = reverse(minus(v235, v203)); real2 v304 = plus(v203, v235); real2 v267 = ctimesminusplus(reverse(v259), ctbl[0], ctimes(v259, ctbl[1])); real2 v4 = load(in, 2 << shift); real2 v20 = load(in, 18 << shift); real2 v72 = minus(v20, v4); real2 v76 = plus(v4, v20); real2 v28 = load(in, 26 << shift); real2 v12 = load(in, 10 << shift); real2 v71 = reverse(minus(v28, v12)); real2 v77 = plus(v12, v28); real2 v210 = minus(v77, v76); real2 v214 = plus(v76, v77); real2 v32 = load(in, 30 << shift); real2 v16 = load(in, 14 << shift); real2 v150 = plus(v16, v32); real2 v144 = reverse(minus(v32, v16)); real2 v8 = load(in, 6 << shift); real2 v24 = load(in, 22 << shift); real2 v149 = plus(v8, v24); real2 v145 = minus(v24, v8); real2 v215 = plus(v149, v150); real2 v209 = reverse(minus(v150, v149)); real2 v241 = reverse(minus(v215, v214)); real2 v247 = plus(v214, v215); real2 v251 = minus(uplusminus(v241), v242); real2 v255 = minus(uminusplus(v241), v242); store(out, 12 << shift, plus(v255, v271)); store(out, 28 << shift, minus(v255, v271)); store(out, 4 << shift, plus(v251, v267)); store(out, 20 << shift, minus(v251, v267)); real2 v278 = plus(v246, v247); real2 v274 = minus(v247, v246); store(out, 24 << shift, minus(uminusplus(v273), v274)); store(out, 8 << shift, minus(uplusminus(v273), v274)); store(out, 16 << shift, minus(v278, v279)); store(out, 0 << shift, plus(v278, v279)); real2 v211 = minusplus(v209, v210); real2 v213 = minusplus(uminus(v209), v210); real2 v219 = ctimesminusplus(reverse(v211), ctbl[0], ctimes(v211, ctbl[1])); real2 v299 = minus(v219, v187); real2 v303 = plus(v187, v219); store(out, 2 << shift, plus(v303, v304)); store(out, 18 << shift, minus(v303, v304)); store(out, 10 << shift, minus(uplusminus(v298), v299)); store(out, 26 << shift, minus(uminusplus(v298), v299)); real2 v223 = ctimesminusplus(reverse(v213), ctbl[0], ctimes(v213, ctbl[0])); real2 v322 = plus(v191, v223); real2 v318 = minus(v223, v191); real2 v239 = ctimesminusplus(reverse(v229), ctbl[3], ctimes(v229, ctbl[2])); real2 v207 = ctimesminusplus(reverse(v197), ctbl[2], ctimes(v197, ctbl[3])); real2 v317 = reverse(minus(v239, v207)); store(out, 30 << shift, minus(uminusplus(v317), v318)); store(out, 14 << shift, minus(uplusminus(v317), v318)); real2 v323 = plus(v207, v239); store(out, 6 << shift, plus(v322, v323)); store(out, 22 << shift, minus(v322, v323)); real2 v101 = ctimesminusplus(reverse(v93), ctbl[8], ctimes(v93, ctbl[9])); real2 v75 = minusplus(uminus(v71), v72); real2 v73 = minusplus(v71, v72); real2 v83 = ctimesminusplus(reverse(v73), ctbl[4], ctimes(v73, ctbl[5])); real2 v162 = minusplus(v160, v161); real2 v164 = minusplus(uminus(v160), v161); real2 v55 = minusplus(uminus(v51), v52); real2 v53 = minusplus(v51, v52); real2 v171 = ctimesminusplus(reverse(v162), ctbl[6], ctimes(v162, ctbl[7])); real2 v352 = reverse(minus(v171, v101)); real2 v358 = plus(v101, v171); real2 v63 = ctimesminusplus(reverse(v53), ctbl[12], ctimes(v53, ctbl[13])); real2 v146 = minusplus(v144, v145); real2 v148 = minusplus(uminus(v144), v145); real2 v154 = ctimesminusplus(reverse(v146), ctbl[2], ctimes(v146, ctbl[3])); real2 v342 = plus(v83, v154); real2 v336 = reverse(minus(v154, v83)); real2 v373 = plus(v341, v342); real2 v369 = minus(v342, v341); real2 v353 = minus(v138, v63); real2 v357 = plus(v63, v138); real2 v374 = plus(v357, v358); store(out, 1 << shift, plus(v373, v374)); store(out, 17 << shift, minus(v373, v374)); real2 v368 = reverse(minus(v358, v357)); store(out, 25 << shift, minus(uminusplus(v368), v369)); store(out, 9 << shift, minus(uplusminus(v368), v369)); real2 v346 = minus(uplusminus(v336), v337); real2 v350 = minus(uminusplus(v336), v337); real2 v356 = minusplus(uminus(v352), v353); real2 v354 = minusplus(v352, v353); real2 v362 = ctimesminusplus(reverse(v354), ctbl[0], ctimes(v354, ctbl[1])); store(out, 21 << shift, minus(v346, v362)); store(out, 5 << shift, plus(v346, v362)); real2 v366 = ctimesminusplus(reverse(v356), ctbl[0], ctimes(v356, ctbl[0])); store(out, 13 << shift, plus(v350, v366)); store(out, 29 << shift, minus(v350, v366)); real2 v89 = ctimesminusplus(reverse(v75), ctbl[2], ctimes(v75, ctbl[3])); real2 v106 = ctimesminusplus(reverse(v95), ctbl[6], ctimes(v95, ctbl[12])); real2 v142 = ctimesminusplus(reverse(v130), ctbl[12], ctimes(v130, ctbl[6])); real2 v158 = ctimesminusplus(reverse(v148), ctbl[3], ctimes(v148, ctbl[2])); real2 v393 = reverse(minus(v158, v89)); real2 v399 = plus(v89, v158); real2 v403 = minus(uplusminus(v393), v394); real2 v407 = minus(uminusplus(v393), v394); real2 v175 = ctimesminusplus(reverse(v164), ctbl[9], ctimes(v164, ctbl[8])); real2 v415 = plus(v106, v175); real2 v409 = reverse(minus(v175, v106)); real2 v69 = ctimesminusplus(reverse(v55), ctbl[8], ctimes(v55, ctbl[9])); real2 v414 = plus(v69, v142); real2 v410 = minus(v142, v69); real2 v411 = minusplus(v409, v410); real2 v413 = minusplus(uminus(v409), v410); real2 v419 = ctimesminusplus(reverse(v411), ctbl[0], ctimes(v411, ctbl[1])); store(out, 23 << shift, minus(v403, v419)); store(out, 7 << shift, plus(v403, v419)); real2 v423 = ctimesminusplus(reverse(v413), ctbl[0], ctimes(v413, ctbl[0])); store(out, 15 << shift, plus(v407, v423)); store(out, 31 << shift, minus(v407, v423)); real2 v431 = plus(v414, v415); real2 v425 = reverse(minus(v415, v414)); real2 v430 = plus(v398, v399); real2 v426 = minus(v399, v398); store(out, 27 << shift, minus(uminusplus(v425), v426)); store(out, 11 << shift, minus(uplusminus(v425), v426)); store(out, 19 << shift, minus(v430, v431)); store(out, 3 << shift, plus(v430, v431)); } } ALIGNED(8192) void but32f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v14 = load(in, 12 << inShift); real2 v30 = load(in, 28 << inShift); real2 v115 = reverse(minus(v14, v30)); real2 v121 = plus(v14, v30); real2 v6 = load(in, 4 << inShift); real2 v22 = load(in, 20 << inShift); real2 v120 = plus(v6, v22); real2 v116 = minus(v22, v6); real2 v201 = plus(v120, v121); real2 v195 = reverse(minus(v120, v121)); real2 v119 = minusplus(uminus(v115), v116); real2 v117 = minusplus(v115, v116); real2 v133 = ctimesminusplus(reverse(v119), tbl[20 + tbloffset], ctimes(v119, tbl[21 + tbloffset])); real2 v127 = ctimesminusplus(reverse(v117), tbl[18 + tbloffset], ctimes(v117, tbl[19 + tbloffset])); real2 v18 = load(in, 16 << inShift); real2 v2 = load(in, 0 << inShift); real2 v40 = plus(v2, v18); real2 v36 = minus(v18, v2); real2 v10 = load(in, 8 << inShift); real2 v26 = load(in, 24 << inShift); real2 v41 = plus(v10, v26); real2 v35 = reverse(minus(v10, v26)); real2 v200 = plus(v40, v41); real2 v196 = minus(v41, v40); real2 v37 = minusplus(v35, v36); real2 v39 = minusplus(uminus(v35), v36); real2 v53 = ctimesminusplus(reverse(v39), tbl[4 + tbloffset], ctimes(v39, tbl[5 + tbloffset])); real2 v276 = minus(v201, v200); real2 v280 = plus(v200, v201); real2 v47 = ctimesminusplus(reverse(v37), tbl[2 + tbloffset], ctimes(v37, tbl[3 + tbloffset])); real2 v199 = minusplus(uminus(v195), v196); real2 v197 = minusplus(v195, v196); real2 v486 = minus(v133, v53); real2 v490 = plus(v53, v133); real2 v213 = ctimesminusplus(reverse(v199), tbl[36 + tbloffset], ctimes(v199, tbl[37 + tbloffset])); real2 v207 = ctimesminusplus(reverse(v197), tbl[34 + tbloffset], ctimes(v197, tbl[35 + tbloffset])); real2 v28 = load(in, 26 << inShift); real2 v12 = load(in, 10 << inShift); real2 v81 = plus(v12, v28); real2 v75 = reverse(minus(v12, v28)); real2 v20 = load(in, 18 << inShift); real2 v4 = load(in, 2 << inShift); real2 v80 = plus(v4, v20); real2 v76 = minus(v20, v4); real2 v236 = minus(v81, v80); real2 v240 = plus(v80, v81); real2 v77 = minusplus(v75, v76); real2 v79 = minusplus(uminus(v75), v76); real2 v93 = ctimesminusplus(reverse(v79), tbl[12 + tbloffset], ctimes(v79, tbl[13 + tbloffset])); real2 v32 = load(in, 30 << inShift); real2 v16 = load(in, 14 << inShift); real2 v155 = reverse(minus(v16, v32)); real2 v161 = plus(v16, v32); real2 v24 = load(in, 22 << inShift); real2 v8 = load(in, 6 << inShift); real2 v160 = plus(v8, v24); real2 v156 = minus(v24, v8); real2 v235 = reverse(minus(v160, v161)); real2 v241 = plus(v160, v161); real2 v157 = minusplus(v155, v156); real2 v159 = minusplus(uminus(v155), v156); real2 v173 = ctimesminusplus(reverse(v159), tbl[28 + tbloffset], ctimes(v159, tbl[29 + tbloffset])); real2 v485 = reverse(minus(v93, v173)); real2 v491 = plus(v93, v173); real2 v489 = minusplus(uminus(v485), v486); real2 v487 = minusplus(v485, v486); real2 v239 = minusplus(uminus(v235), v236); real2 v237 = minusplus(v235, v236); real2 v253 = ctimesminusplus(reverse(v239), tbl[44 + tbloffset], ctimes(v239, tbl[45 + tbloffset])); real2 v497 = ctimesminusplus(reverse(v487), tbl[82 + tbloffset], ctimes(v487, tbl[83 + tbloffset])); real2 v530 = plus(v490, v491); real2 v526 = minus(v491, v490); real2 v503 = ctimesminusplus(reverse(v489), tbl[84 + tbloffset], ctimes(v489, tbl[85 + tbloffset])); real2 v247 = ctimesminusplus(reverse(v237), tbl[42 + tbloffset], ctimes(v237, tbl[43 + tbloffset])); real2 v356 = minus(v247, v207); real2 v360 = plus(v207, v247); real2 v386 = plus(v213, v253); real2 v382 = minus(v253, v213); real2 v17 = load(in, 15 << inShift); real2 v33 = load(in, 31 << inShift); real2 v175 = reverse(minus(v17, v33)); real2 v181 = plus(v17, v33); real2 v25 = load(in, 23 << inShift); real2 v9 = load(in, 7 << inShift); real2 v176 = minus(v25, v9); real2 v180 = plus(v9, v25); real2 v177 = minusplus(v175, v176); real2 v179 = minusplus(uminus(v175), v176); real2 v193 = ctimesminusplus(reverse(v179), tbl[32 + tbloffset], ctimes(v179, tbl[33 + tbloffset])); real2 v261 = plus(v180, v181); real2 v255 = reverse(minus(v180, v181)); real2 v29 = load(in, 27 << inShift); real2 v13 = load(in, 11 << inShift); real2 v101 = plus(v13, v29); real2 v95 = reverse(minus(v13, v29)); real2 v21 = load(in, 19 << inShift); real2 v5 = load(in, 3 << inShift); real2 v100 = plus(v5, v21); real2 v96 = minus(v21, v5); real2 v99 = minusplus(uminus(v95), v96); real2 v97 = minusplus(v95, v96); real2 v260 = plus(v100, v101); real2 v256 = minus(v101, v100); real2 v259 = minusplus(uminus(v255), v256); real2 v257 = minusplus(v255, v256); real2 v273 = ctimesminusplus(reverse(v259), tbl[48 + tbloffset], ctimes(v259, tbl[49 + tbloffset])); real2 v267 = ctimesminusplus(reverse(v257), tbl[46 + tbloffset], ctimes(v257, tbl[47 + tbloffset])); real2 v3 = load(in, 1 << inShift); real2 v19 = load(in, 17 << inShift); real2 v60 = plus(v3, v19); real2 v56 = minus(v19, v3); real2 v27 = load(in, 25 << inShift); real2 v11 = load(in, 9 << inShift); real2 v55 = reverse(minus(v11, v27)); real2 v61 = plus(v11, v27); real2 v220 = plus(v60, v61); real2 v216 = minus(v61, v60); real2 v7 = load(in, 5 << inShift); real2 v23 = load(in, 21 << inShift); real2 v136 = minus(v23, v7); real2 v140 = plus(v7, v23); real2 v15 = load(in, 13 << inShift); real2 v31 = load(in, 29 << inShift); real2 v135 = reverse(minus(v15, v31)); real2 v141 = plus(v15, v31); real2 v215 = reverse(minus(v140, v141)); real2 v221 = plus(v140, v141); real2 v219 = minusplus(uminus(v215), v216); real2 v217 = minusplus(v215, v216); real2 v227 = ctimesminusplus(reverse(v217), tbl[38 + tbloffset], ctimes(v217, tbl[39 + tbloffset])); real2 v355 = reverse(minus(v227, v267)); real2 v361 = plus(v227, v267); store(out, 2 << outShift, plus(v360, v361)); real2 v374 = minus(v360, v361); store(out, 18 << outShift, ctimesminusplus(v374, tbl[0 + tbloffset], ctimes(reverse(v374), tbl[1 + tbloffset]))); real2 v357 = minusplus(v355, v356); store(out, 10 << outShift, ctimesminusplus(reverse(v357), tbl[62 + tbloffset], ctimes(v357, tbl[63 + tbloffset]))); real2 v359 = minusplus(uminus(v355), v356); store(out, 26 << outShift, ctimesminusplus(reverse(v359), tbl[64 + tbloffset], ctimes(v359, tbl[65 + tbloffset]))); real2 v233 = ctimesminusplus(reverse(v219), tbl[40 + tbloffset], ctimes(v219, tbl[41 + tbloffset])); real2 v381 = reverse(minus(v233, v273)); real2 v387 = plus(v233, v273); store(out, 6 << outShift, plus(v386, v387)); real2 v400 = minus(v386, v387); store(out, 22 << outShift, ctimesminusplus(v400, tbl[0 + tbloffset], ctimes(reverse(v400), tbl[1 + tbloffset]))); real2 v383 = minusplus(v381, v382); real2 v385 = minusplus(uminus(v381), v382); store(out, 30 << outShift, ctimesminusplus(reverse(v385), tbl[68 + tbloffset], ctimes(v385, tbl[69 + tbloffset]))); store(out, 14 << outShift, ctimesminusplus(reverse(v383), tbl[66 + tbloffset], ctimes(v383, tbl[67 + tbloffset]))); real2 v137 = minusplus(v135, v136); real2 v139 = minusplus(uminus(v135), v136); real2 v153 = ctimesminusplus(reverse(v139), tbl[24 + tbloffset], ctimes(v139, tbl[25 + tbloffset])); real2 v113 = ctimesminusplus(reverse(v99), tbl[16 + tbloffset], ctimes(v99, tbl[17 + tbloffset])); real2 v511 = plus(v113, v193); real2 v505 = reverse(minus(v113, v193)); real2 v57 = minusplus(v55, v56); real2 v59 = minusplus(uminus(v55), v56); real2 v73 = ctimesminusplus(reverse(v59), tbl[8 + tbloffset], ctimes(v59, tbl[9 + tbloffset])); real2 v510 = plus(v73, v153); real2 v506 = minus(v153, v73); real2 v531 = plus(v510, v511); real2 v525 = reverse(minus(v510, v511)); store(out, 3 << outShift, plus(v530, v531)); real2 v544 = minus(v530, v531); store(out, 19 << outShift, ctimesminusplus(v544, tbl[0 + tbloffset], ctimes(reverse(v544), tbl[1 + tbloffset]))); real2 v527 = minusplus(v525, v526); store(out, 11 << outShift, ctimesminusplus(reverse(v527), tbl[90 + tbloffset], ctimes(v527, tbl[91 + tbloffset]))); real2 v529 = minusplus(uminus(v525), v526); store(out, 27 << outShift, ctimesminusplus(reverse(v529), tbl[92 + tbloffset], ctimes(v529, tbl[93 + tbloffset]))); real2 v509 = minusplus(uminus(v505), v506); real2 v507 = minusplus(v505, v506); real2 v523 = ctimesminusplus(reverse(v509), tbl[88 + tbloffset], ctimes(v509, tbl[89 + tbloffset])); store(out, 15 << outShift, plus(v503, v523)); real2 v556 = minus(v503, v523); store(out, 31 << outShift, ctimesminusplus(v556, tbl[0 + tbloffset], ctimes(reverse(v556), tbl[1 + tbloffset]))); real2 v517 = ctimesminusplus(reverse(v507), tbl[86 + tbloffset], ctimes(v507, tbl[87 + tbloffset])); store(out, 7 << outShift, plus(v497, v517)); real2 v550 = minus(v497, v517); store(out, 23 << outShift, ctimesminusplus(v550, tbl[0 + tbloffset], ctimes(reverse(v550), tbl[1 + tbloffset]))); real2 v275 = reverse(minus(v240, v241)); real2 v281 = plus(v240, v241); real2 v320 = plus(v280, v281); real2 v316 = minus(v281, v280); real2 v301 = plus(v260, v261); real2 v295 = reverse(minus(v260, v261)); real2 v300 = plus(v220, v221); real2 v296 = minus(v221, v220); real2 v315 = reverse(minus(v300, v301)); real2 v321 = plus(v300, v301); store(out, 0 << outShift, plus(v320, v321)); real2 v334 = minus(v320, v321); store(out, 16 << outShift, ctimesminusplus(v334, tbl[0 + tbloffset], ctimes(reverse(v334), tbl[1 + tbloffset]))); real2 v319 = minusplus(uminus(v315), v316); real2 v317 = minusplus(v315, v316); store(out, 8 << outShift, ctimesminusplus(reverse(v317), tbl[58 + tbloffset], ctimes(v317, tbl[59 + tbloffset]))); store(out, 24 << outShift, ctimesminusplus(reverse(v319), tbl[60 + tbloffset], ctimes(v319, tbl[61 + tbloffset]))); real2 v299 = minusplus(uminus(v295), v296); real2 v297 = minusplus(v295, v296); real2 v279 = minusplus(uminus(v275), v276); real2 v277 = minusplus(v275, v276); real2 v287 = ctimesminusplus(reverse(v277), tbl[50 + tbloffset], ctimes(v277, tbl[51 + tbloffset])); real2 v307 = ctimesminusplus(reverse(v297), tbl[54 + tbloffset], ctimes(v297, tbl[55 + tbloffset])); store(out, 4 << outShift, plus(v287, v307)); real2 v342 = minus(v287, v307); store(out, 20 << outShift, ctimesminusplus(v342, tbl[0 + tbloffset], ctimes(reverse(v342), tbl[1 + tbloffset]))); real2 v313 = ctimesminusplus(reverse(v299), tbl[56 + tbloffset], ctimes(v299, tbl[57 + tbloffset])); real2 v293 = ctimesminusplus(reverse(v279), tbl[52 + tbloffset], ctimes(v279, tbl[53 + tbloffset])); store(out, 12 << outShift, plus(v293, v313)); real2 v348 = minus(v293, v313); store(out, 28 << outShift, ctimesminusplus(v348, tbl[0 + tbloffset], ctimes(reverse(v348), tbl[1 + tbloffset]))); real2 v87 = ctimesminusplus(reverse(v77), tbl[10 + tbloffset], ctimes(v77, tbl[11 + tbloffset])); real2 v147 = ctimesminusplus(reverse(v137), tbl[22 + tbloffset], ctimes(v137, tbl[23 + tbloffset])); real2 v187 = ctimesminusplus(reverse(v177), tbl[30 + tbloffset], ctimes(v177, tbl[31 + tbloffset])); real2 v167 = ctimesminusplus(reverse(v157), tbl[26 + tbloffset], ctimes(v157, tbl[27 + tbloffset])); real2 v413 = plus(v87, v167); real2 v407 = reverse(minus(v87, v167)); real2 v67 = ctimesminusplus(reverse(v57), tbl[6 + tbloffset], ctimes(v57, tbl[7 + tbloffset])); real2 v107 = ctimesminusplus(reverse(v97), tbl[14 + tbloffset], ctimes(v97, tbl[15 + tbloffset])); real2 v427 = reverse(minus(v107, v187)); real2 v433 = plus(v107, v187); real2 v432 = plus(v67, v147); real2 v428 = minus(v147, v67); real2 v453 = plus(v432, v433); real2 v447 = reverse(minus(v432, v433)); real2 v408 = minus(v127, v47); real2 v412 = plus(v47, v127); real2 v452 = plus(v412, v413); real2 v448 = minus(v413, v412); store(out, 1 << outShift, plus(v452, v453)); real2 v466 = minus(v452, v453); store(out, 17 << outShift, ctimesminusplus(v466, tbl[0 + tbloffset], ctimes(reverse(v466), tbl[1 + tbloffset]))); real2 v451 = minusplus(uminus(v447), v448); store(out, 25 << outShift, ctimesminusplus(reverse(v451), tbl[80 + tbloffset], ctimes(v451, tbl[81 + tbloffset]))); real2 v449 = minusplus(v447, v448); store(out, 9 << outShift, ctimesminusplus(reverse(v449), tbl[78 + tbloffset], ctimes(v449, tbl[79 + tbloffset]))); real2 v429 = minusplus(v427, v428); real2 v431 = minusplus(uminus(v427), v428); real2 v445 = ctimesminusplus(reverse(v431), tbl[76 + tbloffset], ctimes(v431, tbl[77 + tbloffset])); real2 v409 = minusplus(v407, v408); real2 v411 = minusplus(uminus(v407), v408); real2 v425 = ctimesminusplus(reverse(v411), tbl[72 + tbloffset], ctimes(v411, tbl[73 + tbloffset])); store(out, 13 << outShift, plus(v425, v445)); real2 v478 = minus(v425, v445); store(out, 29 << outShift, ctimesminusplus(v478, tbl[0 + tbloffset], ctimes(reverse(v478), tbl[1 + tbloffset]))); real2 v439 = ctimesminusplus(reverse(v429), tbl[74 + tbloffset], ctimes(v429, tbl[75 + tbloffset])); real2 v419 = ctimesminusplus(reverse(v409), tbl[70 + tbloffset], ctimes(v409, tbl[71 + tbloffset])); store(out, 5 << outShift, plus(v419, v439)); real2 v472 = minus(v419, v439); store(out, 21 << outShift, ctimesminusplus(v472, tbl[0 + tbloffset], ctimes(reverse(v472), tbl[1 + tbloffset]))); } } ALIGNED(8192) void but32b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); real2 v14 = load(in, 12 << inShift); real2 v30 = load(in, 28 << inShift); real2 v115 = reverse(minus(v30, v14)); real2 v121 = plus(v14, v30); real2 v6 = load(in, 4 << inShift); real2 v22 = load(in, 20 << inShift); real2 v120 = plus(v6, v22); real2 v116 = minus(v22, v6); real2 v201 = plus(v120, v121); real2 v195 = reverse(minus(v121, v120)); real2 v119 = minusplus(uminus(v115), v116); real2 v117 = minusplus(v115, v116); real2 v133 = ctimesminusplus(reverse(v119), tbl[20 + tbloffset], ctimes(v119, tbl[21 + tbloffset])); real2 v127 = ctimesminusplus(reverse(v117), tbl[18 + tbloffset], ctimes(v117, tbl[19 + tbloffset])); real2 v18 = load(in, 16 << inShift); real2 v2 = load(in, 0 << inShift); real2 v40 = plus(v2, v18); real2 v36 = minus(v18, v2); real2 v10 = load(in, 8 << inShift); real2 v26 = load(in, 24 << inShift); real2 v41 = plus(v10, v26); real2 v35 = reverse(minus(v26, v10)); real2 v200 = plus(v40, v41); real2 v196 = minus(v41, v40); real2 v37 = minusplus(v35, v36); real2 v39 = minusplus(uminus(v35), v36); real2 v53 = ctimesminusplus(reverse(v39), tbl[4 + tbloffset], ctimes(v39, tbl[5 + tbloffset])); real2 v276 = minus(v201, v200); real2 v280 = plus(v200, v201); real2 v47 = ctimesminusplus(reverse(v37), tbl[2 + tbloffset], ctimes(v37, tbl[3 + tbloffset])); real2 v199 = minusplus(uminus(v195), v196); real2 v197 = minusplus(v195, v196); real2 v486 = minus(v133, v53); real2 v490 = plus(v53, v133); real2 v213 = ctimesminusplus(reverse(v199), tbl[36 + tbloffset], ctimes(v199, tbl[37 + tbloffset])); real2 v207 = ctimesminusplus(reverse(v197), tbl[34 + tbloffset], ctimes(v197, tbl[35 + tbloffset])); real2 v28 = load(in, 26 << inShift); real2 v12 = load(in, 10 << inShift); real2 v81 = plus(v12, v28); real2 v75 = reverse(minus(v28, v12)); real2 v20 = load(in, 18 << inShift); real2 v4 = load(in, 2 << inShift); real2 v80 = plus(v4, v20); real2 v76 = minus(v20, v4); real2 v236 = minus(v81, v80); real2 v240 = plus(v80, v81); real2 v77 = minusplus(v75, v76); real2 v79 = minusplus(uminus(v75), v76); real2 v93 = ctimesminusplus(reverse(v79), tbl[12 + tbloffset], ctimes(v79, tbl[13 + tbloffset])); real2 v32 = load(in, 30 << inShift); real2 v16 = load(in, 14 << inShift); real2 v155 = reverse(minus(v32, v16)); real2 v161 = plus(v16, v32); real2 v24 = load(in, 22 << inShift); real2 v8 = load(in, 6 << inShift); real2 v160 = plus(v8, v24); real2 v156 = minus(v24, v8); real2 v235 = reverse(minus(v161, v160)); real2 v241 = plus(v160, v161); real2 v157 = minusplus(v155, v156); real2 v159 = minusplus(uminus(v155), v156); real2 v173 = ctimesminusplus(reverse(v159), tbl[28 + tbloffset], ctimes(v159, tbl[29 + tbloffset])); real2 v485 = reverse(minus(v173, v93)); real2 v491 = plus(v93, v173); real2 v489 = minusplus(uminus(v485), v486); real2 v487 = minusplus(v485, v486); real2 v239 = minusplus(uminus(v235), v236); real2 v237 = minusplus(v235, v236); real2 v253 = ctimesminusplus(reverse(v239), tbl[44 + tbloffset], ctimes(v239, tbl[45 + tbloffset])); real2 v497 = ctimesminusplus(reverse(v487), tbl[82 + tbloffset], ctimes(v487, tbl[83 + tbloffset])); real2 v530 = plus(v490, v491); real2 v526 = minus(v491, v490); real2 v503 = ctimesminusplus(reverse(v489), tbl[84 + tbloffset], ctimes(v489, tbl[85 + tbloffset])); real2 v247 = ctimesminusplus(reverse(v237), tbl[42 + tbloffset], ctimes(v237, tbl[43 + tbloffset])); real2 v356 = minus(v247, v207); real2 v360 = plus(v207, v247); real2 v386 = plus(v213, v253); real2 v382 = minus(v253, v213); real2 v17 = load(in, 15 << inShift); real2 v33 = load(in, 31 << inShift); real2 v175 = reverse(minus(v33, v17)); real2 v181 = plus(v17, v33); real2 v25 = load(in, 23 << inShift); real2 v9 = load(in, 7 << inShift); real2 v176 = minus(v25, v9); real2 v180 = plus(v9, v25); real2 v177 = minusplus(v175, v176); real2 v179 = minusplus(uminus(v175), v176); real2 v193 = ctimesminusplus(reverse(v179), tbl[32 + tbloffset], ctimes(v179, tbl[33 + tbloffset])); real2 v261 = plus(v180, v181); real2 v255 = reverse(minus(v181, v180)); real2 v29 = load(in, 27 << inShift); real2 v13 = load(in, 11 << inShift); real2 v101 = plus(v13, v29); real2 v95 = reverse(minus(v29, v13)); real2 v21 = load(in, 19 << inShift); real2 v5 = load(in, 3 << inShift); real2 v100 = plus(v5, v21); real2 v96 = minus(v21, v5); real2 v99 = minusplus(uminus(v95), v96); real2 v97 = minusplus(v95, v96); real2 v260 = plus(v100, v101); real2 v256 = minus(v101, v100); real2 v259 = minusplus(uminus(v255), v256); real2 v257 = minusplus(v255, v256); real2 v273 = ctimesminusplus(reverse(v259), tbl[48 + tbloffset], ctimes(v259, tbl[49 + tbloffset])); real2 v267 = ctimesminusplus(reverse(v257), tbl[46 + tbloffset], ctimes(v257, tbl[47 + tbloffset])); real2 v3 = load(in, 1 << inShift); real2 v19 = load(in, 17 << inShift); real2 v60 = plus(v3, v19); real2 v56 = minus(v19, v3); real2 v27 = load(in, 25 << inShift); real2 v11 = load(in, 9 << inShift); real2 v55 = reverse(minus(v27, v11)); real2 v61 = plus(v11, v27); real2 v220 = plus(v60, v61); real2 v216 = minus(v61, v60); real2 v7 = load(in, 5 << inShift); real2 v23 = load(in, 21 << inShift); real2 v136 = minus(v23, v7); real2 v140 = plus(v7, v23); real2 v15 = load(in, 13 << inShift); real2 v31 = load(in, 29 << inShift); real2 v135 = reverse(minus(v31, v15)); real2 v141 = plus(v15, v31); real2 v215 = reverse(minus(v141, v140)); real2 v221 = plus(v140, v141); real2 v219 = minusplus(uminus(v215), v216); real2 v217 = minusplus(v215, v216); real2 v227 = ctimesminusplus(reverse(v217), tbl[38 + tbloffset], ctimes(v217, tbl[39 + tbloffset])); real2 v355 = reverse(minus(v267, v227)); real2 v361 = plus(v227, v267); store(out, 2 << outShift, plus(v360, v361)); real2 v374 = minus(v360, v361); store(out, 18 << outShift, ctimesminusplus(v374, tbl[0 + tbloffset], ctimes(reverse(v374), tbl[1 + tbloffset]))); real2 v357 = minusplus(v355, v356); store(out, 10 << outShift, ctimesminusplus(reverse(v357), tbl[62 + tbloffset], ctimes(v357, tbl[63 + tbloffset]))); real2 v359 = minusplus(uminus(v355), v356); store(out, 26 << outShift, ctimesminusplus(reverse(v359), tbl[64 + tbloffset], ctimes(v359, tbl[65 + tbloffset]))); real2 v233 = ctimesminusplus(reverse(v219), tbl[40 + tbloffset], ctimes(v219, tbl[41 + tbloffset])); real2 v381 = reverse(minus(v273, v233)); real2 v387 = plus(v233, v273); store(out, 6 << outShift, plus(v386, v387)); real2 v400 = minus(v386, v387); store(out, 22 << outShift, ctimesminusplus(v400, tbl[0 + tbloffset], ctimes(reverse(v400), tbl[1 + tbloffset]))); real2 v383 = minusplus(v381, v382); real2 v385 = minusplus(uminus(v381), v382); store(out, 30 << outShift, ctimesminusplus(reverse(v385), tbl[68 + tbloffset], ctimes(v385, tbl[69 + tbloffset]))); store(out, 14 << outShift, ctimesminusplus(reverse(v383), tbl[66 + tbloffset], ctimes(v383, tbl[67 + tbloffset]))); real2 v137 = minusplus(v135, v136); real2 v139 = minusplus(uminus(v135), v136); real2 v153 = ctimesminusplus(reverse(v139), tbl[24 + tbloffset], ctimes(v139, tbl[25 + tbloffset])); real2 v113 = ctimesminusplus(reverse(v99), tbl[16 + tbloffset], ctimes(v99, tbl[17 + tbloffset])); real2 v511 = plus(v113, v193); real2 v505 = reverse(minus(v193, v113)); real2 v57 = minusplus(v55, v56); real2 v59 = minusplus(uminus(v55), v56); real2 v73 = ctimesminusplus(reverse(v59), tbl[8 + tbloffset], ctimes(v59, tbl[9 + tbloffset])); real2 v510 = plus(v73, v153); real2 v506 = minus(v153, v73); real2 v531 = plus(v510, v511); real2 v525 = reverse(minus(v511, v510)); store(out, 3 << outShift, plus(v530, v531)); real2 v544 = minus(v530, v531); store(out, 19 << outShift, ctimesminusplus(v544, tbl[0 + tbloffset], ctimes(reverse(v544), tbl[1 + tbloffset]))); real2 v527 = minusplus(v525, v526); store(out, 11 << outShift, ctimesminusplus(reverse(v527), tbl[90 + tbloffset], ctimes(v527, tbl[91 + tbloffset]))); real2 v529 = minusplus(uminus(v525), v526); store(out, 27 << outShift, ctimesminusplus(reverse(v529), tbl[92 + tbloffset], ctimes(v529, tbl[93 + tbloffset]))); real2 v509 = minusplus(uminus(v505), v506); real2 v507 = minusplus(v505, v506); real2 v523 = ctimesminusplus(reverse(v509), tbl[88 + tbloffset], ctimes(v509, tbl[89 + tbloffset])); store(out, 15 << outShift, plus(v503, v523)); real2 v556 = minus(v503, v523); store(out, 31 << outShift, ctimesminusplus(v556, tbl[0 + tbloffset], ctimes(reverse(v556), tbl[1 + tbloffset]))); real2 v517 = ctimesminusplus(reverse(v507), tbl[86 + tbloffset], ctimes(v507, tbl[87 + tbloffset])); store(out, 7 << outShift, plus(v497, v517)); real2 v550 = minus(v497, v517); store(out, 23 << outShift, ctimesminusplus(v550, tbl[0 + tbloffset], ctimes(reverse(v550), tbl[1 + tbloffset]))); real2 v275 = reverse(minus(v241, v240)); real2 v281 = plus(v240, v241); real2 v320 = plus(v280, v281); real2 v316 = minus(v281, v280); real2 v301 = plus(v260, v261); real2 v295 = reverse(minus(v261, v260)); real2 v300 = plus(v220, v221); real2 v296 = minus(v221, v220); real2 v315 = reverse(minus(v301, v300)); real2 v321 = plus(v300, v301); store(out, 0 << outShift, plus(v320, v321)); real2 v334 = minus(v320, v321); store(out, 16 << outShift, ctimesminusplus(v334, tbl[0 + tbloffset], ctimes(reverse(v334), tbl[1 + tbloffset]))); real2 v319 = minusplus(uminus(v315), v316); real2 v317 = minusplus(v315, v316); store(out, 8 << outShift, ctimesminusplus(reverse(v317), tbl[58 + tbloffset], ctimes(v317, tbl[59 + tbloffset]))); store(out, 24 << outShift, ctimesminusplus(reverse(v319), tbl[60 + tbloffset], ctimes(v319, tbl[61 + tbloffset]))); real2 v299 = minusplus(uminus(v295), v296); real2 v297 = minusplus(v295, v296); real2 v279 = minusplus(uminus(v275), v276); real2 v277 = minusplus(v275, v276); real2 v287 = ctimesminusplus(reverse(v277), tbl[50 + tbloffset], ctimes(v277, tbl[51 + tbloffset])); real2 v307 = ctimesminusplus(reverse(v297), tbl[54 + tbloffset], ctimes(v297, tbl[55 + tbloffset])); store(out, 4 << outShift, plus(v287, v307)); real2 v342 = minus(v287, v307); store(out, 20 << outShift, ctimesminusplus(v342, tbl[0 + tbloffset], ctimes(reverse(v342), tbl[1 + tbloffset]))); real2 v313 = ctimesminusplus(reverse(v299), tbl[56 + tbloffset], ctimes(v299, tbl[57 + tbloffset])); real2 v293 = ctimesminusplus(reverse(v279), tbl[52 + tbloffset], ctimes(v279, tbl[53 + tbloffset])); store(out, 12 << outShift, plus(v293, v313)); real2 v348 = minus(v293, v313); store(out, 28 << outShift, ctimesminusplus(v348, tbl[0 + tbloffset], ctimes(reverse(v348), tbl[1 + tbloffset]))); real2 v87 = ctimesminusplus(reverse(v77), tbl[10 + tbloffset], ctimes(v77, tbl[11 + tbloffset])); real2 v147 = ctimesminusplus(reverse(v137), tbl[22 + tbloffset], ctimes(v137, tbl[23 + tbloffset])); real2 v187 = ctimesminusplus(reverse(v177), tbl[30 + tbloffset], ctimes(v177, tbl[31 + tbloffset])); real2 v167 = ctimesminusplus(reverse(v157), tbl[26 + tbloffset], ctimes(v157, tbl[27 + tbloffset])); real2 v413 = plus(v87, v167); real2 v407 = reverse(minus(v167, v87)); real2 v67 = ctimesminusplus(reverse(v57), tbl[6 + tbloffset], ctimes(v57, tbl[7 + tbloffset])); real2 v107 = ctimesminusplus(reverse(v97), tbl[14 + tbloffset], ctimes(v97, tbl[15 + tbloffset])); real2 v427 = reverse(minus(v187, v107)); real2 v433 = plus(v107, v187); real2 v432 = plus(v67, v147); real2 v428 = minus(v147, v67); real2 v453 = plus(v432, v433); real2 v447 = reverse(minus(v433, v432)); real2 v408 = minus(v127, v47); real2 v412 = plus(v47, v127); real2 v452 = plus(v412, v413); real2 v448 = minus(v413, v412); store(out, 1 << outShift, plus(v452, v453)); real2 v466 = minus(v452, v453); store(out, 17 << outShift, ctimesminusplus(v466, tbl[0 + tbloffset], ctimes(reverse(v466), tbl[1 + tbloffset]))); real2 v451 = minusplus(uminus(v447), v448); store(out, 25 << outShift, ctimesminusplus(reverse(v451), tbl[80 + tbloffset], ctimes(v451, tbl[81 + tbloffset]))); real2 v449 = minusplus(v447, v448); store(out, 9 << outShift, ctimesminusplus(reverse(v449), tbl[78 + tbloffset], ctimes(v449, tbl[79 + tbloffset]))); real2 v429 = minusplus(v427, v428); real2 v431 = minusplus(uminus(v427), v428); real2 v445 = ctimesminusplus(reverse(v431), tbl[76 + tbloffset], ctimes(v431, tbl[77 + tbloffset])); real2 v409 = minusplus(v407, v408); real2 v411 = minusplus(uminus(v407), v408); real2 v425 = ctimesminusplus(reverse(v411), tbl[72 + tbloffset], ctimes(v411, tbl[73 + tbloffset])); store(out, 13 << outShift, plus(v425, v445)); real2 v478 = minus(v425, v445); store(out, 29 << outShift, ctimesminusplus(v478, tbl[0 + tbloffset], ctimes(reverse(v478), tbl[1 + tbloffset]))); real2 v439 = ctimesminusplus(reverse(v429), tbl[74 + tbloffset], ctimes(v429, tbl[75 + tbloffset])); real2 v419 = ctimesminusplus(reverse(v409), tbl[70 + tbloffset], ctimes(v409, tbl[71 + tbloffset])); store(out, 5 << outShift, plus(v419, v439)); real2 v472 = minus(v419, v439); store(out, 21 << outShift, ctimesminusplus(v472, tbl[0 + tbloffset], ctimes(reverse(v472), tbl[1 + tbloffset]))); } } ALIGNED(8192) void tbut32f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v14 = load(in, 12 << inShift); real2 v30 = load(in, 28 << inShift); real2 v115 = reverse(minus(v14, v30)); real2 v121 = plus(v14, v30); real2 v6 = load(in, 4 << inShift); real2 v22 = load(in, 20 << inShift); real2 v120 = plus(v6, v22); real2 v116 = minus(v22, v6); real2 v201 = plus(v120, v121); real2 v195 = reverse(minus(v120, v121)); real2 v119 = minusplus(uminus(v115), v116); real2 v117 = minusplus(v115, v116); real2 v133 = timesminusplus(reverse(v119), load(tbl, 20 * VECWIDTH + tbloffset), times(v119, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v127 = timesminusplus(reverse(v117), load(tbl, 18 * VECWIDTH + tbloffset), times(v117, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v18 = load(in, 16 << inShift); real2 v2 = load(in, 0 << inShift); real2 v40 = plus(v2, v18); real2 v36 = minus(v18, v2); real2 v10 = load(in, 8 << inShift); real2 v26 = load(in, 24 << inShift); real2 v41 = plus(v10, v26); real2 v35 = reverse(minus(v10, v26)); real2 v200 = plus(v40, v41); real2 v196 = minus(v41, v40); real2 v37 = minusplus(v35, v36); real2 v39 = minusplus(uminus(v35), v36); real2 v53 = timesminusplus(reverse(v39), load(tbl, 4 * VECWIDTH + tbloffset), times(v39, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v276 = minus(v201, v200); real2 v280 = plus(v200, v201); real2 v47 = timesminusplus(reverse(v37), load(tbl, 2 * VECWIDTH + tbloffset), times(v37, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v199 = minusplus(uminus(v195), v196); real2 v197 = minusplus(v195, v196); real2 v486 = minus(v133, v53); real2 v490 = plus(v53, v133); real2 v213 = timesminusplus(reverse(v199), load(tbl, 36 * VECWIDTH + tbloffset), times(v199, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v207 = timesminusplus(reverse(v197), load(tbl, 34 * VECWIDTH + tbloffset), times(v197, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v28 = load(in, 26 << inShift); real2 v12 = load(in, 10 << inShift); real2 v81 = plus(v12, v28); real2 v75 = reverse(minus(v12, v28)); real2 v20 = load(in, 18 << inShift); real2 v4 = load(in, 2 << inShift); real2 v80 = plus(v4, v20); real2 v76 = minus(v20, v4); real2 v236 = minus(v81, v80); real2 v240 = plus(v80, v81); real2 v77 = minusplus(v75, v76); real2 v79 = minusplus(uminus(v75), v76); real2 v93 = timesminusplus(reverse(v79), load(tbl, 12 * VECWIDTH + tbloffset), times(v79, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v32 = load(in, 30 << inShift); real2 v16 = load(in, 14 << inShift); real2 v155 = reverse(minus(v16, v32)); real2 v161 = plus(v16, v32); real2 v24 = load(in, 22 << inShift); real2 v8 = load(in, 6 << inShift); real2 v160 = plus(v8, v24); real2 v156 = minus(v24, v8); real2 v235 = reverse(minus(v160, v161)); real2 v241 = plus(v160, v161); real2 v157 = minusplus(v155, v156); real2 v159 = minusplus(uminus(v155), v156); real2 v173 = timesminusplus(reverse(v159), load(tbl, 28 * VECWIDTH + tbloffset), times(v159, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v485 = reverse(minus(v93, v173)); real2 v491 = plus(v93, v173); real2 v489 = minusplus(uminus(v485), v486); real2 v487 = minusplus(v485, v486); real2 v239 = minusplus(uminus(v235), v236); real2 v237 = minusplus(v235, v236); real2 v253 = timesminusplus(reverse(v239), load(tbl, 44 * VECWIDTH + tbloffset), times(v239, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v497 = timesminusplus(reverse(v487), load(tbl, 82 * VECWIDTH + tbloffset), times(v487, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v530 = plus(v490, v491); real2 v526 = minus(v491, v490); real2 v503 = timesminusplus(reverse(v489), load(tbl, 84 * VECWIDTH + tbloffset), times(v489, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v247 = timesminusplus(reverse(v237), load(tbl, 42 * VECWIDTH + tbloffset), times(v237, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v356 = minus(v247, v207); real2 v360 = plus(v207, v247); real2 v386 = plus(v213, v253); real2 v382 = minus(v253, v213); real2 v17 = load(in, 15 << inShift); real2 v33 = load(in, 31 << inShift); real2 v175 = reverse(minus(v17, v33)); real2 v181 = plus(v17, v33); real2 v25 = load(in, 23 << inShift); real2 v9 = load(in, 7 << inShift); real2 v176 = minus(v25, v9); real2 v180 = plus(v9, v25); real2 v177 = minusplus(v175, v176); real2 v179 = minusplus(uminus(v175), v176); real2 v193 = timesminusplus(reverse(v179), load(tbl, 32 * VECWIDTH + tbloffset), times(v179, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v261 = plus(v180, v181); real2 v255 = reverse(minus(v180, v181)); real2 v29 = load(in, 27 << inShift); real2 v13 = load(in, 11 << inShift); real2 v101 = plus(v13, v29); real2 v95 = reverse(minus(v13, v29)); real2 v21 = load(in, 19 << inShift); real2 v5 = load(in, 3 << inShift); real2 v100 = plus(v5, v21); real2 v96 = minus(v21, v5); real2 v99 = minusplus(uminus(v95), v96); real2 v97 = minusplus(v95, v96); real2 v260 = plus(v100, v101); real2 v256 = minus(v101, v100); real2 v259 = minusplus(uminus(v255), v256); real2 v257 = minusplus(v255, v256); real2 v273 = timesminusplus(reverse(v259), load(tbl, 48 * VECWIDTH + tbloffset), times(v259, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v267 = timesminusplus(reverse(v257), load(tbl, 46 * VECWIDTH + tbloffset), times(v257, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v3 = load(in, 1 << inShift); real2 v19 = load(in, 17 << inShift); real2 v60 = plus(v3, v19); real2 v56 = minus(v19, v3); real2 v27 = load(in, 25 << inShift); real2 v11 = load(in, 9 << inShift); real2 v55 = reverse(minus(v11, v27)); real2 v61 = plus(v11, v27); real2 v220 = plus(v60, v61); real2 v216 = minus(v61, v60); real2 v7 = load(in, 5 << inShift); real2 v23 = load(in, 21 << inShift); real2 v136 = minus(v23, v7); real2 v140 = plus(v7, v23); real2 v15 = load(in, 13 << inShift); real2 v31 = load(in, 29 << inShift); real2 v135 = reverse(minus(v15, v31)); real2 v141 = plus(v15, v31); real2 v215 = reverse(minus(v140, v141)); real2 v221 = plus(v140, v141); real2 v219 = minusplus(uminus(v215), v216); real2 v217 = minusplus(v215, v216); real2 v227 = timesminusplus(reverse(v217), load(tbl, 38 * VECWIDTH + tbloffset), times(v217, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v355 = reverse(minus(v227, v267)); real2 v361 = plus(v227, v267); scatter(out, 2, 32, plus(v360, v361)); real2 v374 = minus(v360, v361); scatter(out, 18, 32, timesminusplus(v374, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v374), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v357 = minusplus(v355, v356); scatter(out, 10, 32, timesminusplus(reverse(v357), load(tbl, 62 * VECWIDTH + tbloffset), times(v357, load(tbl, 63 * VECWIDTH + tbloffset)))); real2 v359 = minusplus(uminus(v355), v356); scatter(out, 26, 32, timesminusplus(reverse(v359), load(tbl, 64 * VECWIDTH + tbloffset), times(v359, load(tbl, 65 * VECWIDTH + tbloffset)))); real2 v233 = timesminusplus(reverse(v219), load(tbl, 40 * VECWIDTH + tbloffset), times(v219, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v381 = reverse(minus(v233, v273)); real2 v387 = plus(v233, v273); scatter(out, 6, 32, plus(v386, v387)); real2 v400 = minus(v386, v387); scatter(out, 22, 32, timesminusplus(v400, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v400), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v383 = minusplus(v381, v382); real2 v385 = minusplus(uminus(v381), v382); scatter(out, 30, 32, timesminusplus(reverse(v385), load(tbl, 68 * VECWIDTH + tbloffset), times(v385, load(tbl, 69 * VECWIDTH + tbloffset)))); scatter(out, 14, 32, timesminusplus(reverse(v383), load(tbl, 66 * VECWIDTH + tbloffset), times(v383, load(tbl, 67 * VECWIDTH + tbloffset)))); real2 v137 = minusplus(v135, v136); real2 v139 = minusplus(uminus(v135), v136); real2 v153 = timesminusplus(reverse(v139), load(tbl, 24 * VECWIDTH + tbloffset), times(v139, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v113 = timesminusplus(reverse(v99), load(tbl, 16 * VECWIDTH + tbloffset), times(v99, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v511 = plus(v113, v193); real2 v505 = reverse(minus(v113, v193)); real2 v57 = minusplus(v55, v56); real2 v59 = minusplus(uminus(v55), v56); real2 v73 = timesminusplus(reverse(v59), load(tbl, 8 * VECWIDTH + tbloffset), times(v59, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v510 = plus(v73, v153); real2 v506 = minus(v153, v73); real2 v531 = plus(v510, v511); real2 v525 = reverse(minus(v510, v511)); scatter(out, 3, 32, plus(v530, v531)); real2 v544 = minus(v530, v531); scatter(out, 19, 32, timesminusplus(v544, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v544), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v527 = minusplus(v525, v526); scatter(out, 11, 32, timesminusplus(reverse(v527), load(tbl, 90 * VECWIDTH + tbloffset), times(v527, load(tbl, 91 * VECWIDTH + tbloffset)))); real2 v529 = minusplus(uminus(v525), v526); scatter(out, 27, 32, timesminusplus(reverse(v529), load(tbl, 92 * VECWIDTH + tbloffset), times(v529, load(tbl, 93 * VECWIDTH + tbloffset)))); real2 v509 = minusplus(uminus(v505), v506); real2 v507 = minusplus(v505, v506); real2 v523 = timesminusplus(reverse(v509), load(tbl, 88 * VECWIDTH + tbloffset), times(v509, load(tbl, 89 * VECWIDTH + tbloffset))); scatter(out, 15, 32, plus(v503, v523)); real2 v556 = minus(v503, v523); scatter(out, 31, 32, timesminusplus(v556, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v556), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v517 = timesminusplus(reverse(v507), load(tbl, 86 * VECWIDTH + tbloffset), times(v507, load(tbl, 87 * VECWIDTH + tbloffset))); scatter(out, 7, 32, plus(v497, v517)); real2 v550 = minus(v497, v517); scatter(out, 23, 32, timesminusplus(v550, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v550), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v275 = reverse(minus(v240, v241)); real2 v281 = plus(v240, v241); real2 v320 = plus(v280, v281); real2 v316 = minus(v281, v280); real2 v301 = plus(v260, v261); real2 v295 = reverse(minus(v260, v261)); real2 v300 = plus(v220, v221); real2 v296 = minus(v221, v220); real2 v315 = reverse(minus(v300, v301)); real2 v321 = plus(v300, v301); scatter(out, 0, 32, plus(v320, v321)); real2 v334 = minus(v320, v321); scatter(out, 16, 32, timesminusplus(v334, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v334), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v319 = minusplus(uminus(v315), v316); real2 v317 = minusplus(v315, v316); scatter(out, 8, 32, timesminusplus(reverse(v317), load(tbl, 58 * VECWIDTH + tbloffset), times(v317, load(tbl, 59 * VECWIDTH + tbloffset)))); scatter(out, 24, 32, timesminusplus(reverse(v319), load(tbl, 60 * VECWIDTH + tbloffset), times(v319, load(tbl, 61 * VECWIDTH + tbloffset)))); real2 v299 = minusplus(uminus(v295), v296); real2 v297 = minusplus(v295, v296); real2 v279 = minusplus(uminus(v275), v276); real2 v277 = minusplus(v275, v276); real2 v287 = timesminusplus(reverse(v277), load(tbl, 50 * VECWIDTH + tbloffset), times(v277, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v307 = timesminusplus(reverse(v297), load(tbl, 54 * VECWIDTH + tbloffset), times(v297, load(tbl, 55 * VECWIDTH + tbloffset))); scatter(out, 4, 32, plus(v287, v307)); real2 v342 = minus(v287, v307); scatter(out, 20, 32, timesminusplus(v342, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v342), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v313 = timesminusplus(reverse(v299), load(tbl, 56 * VECWIDTH + tbloffset), times(v299, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v293 = timesminusplus(reverse(v279), load(tbl, 52 * VECWIDTH + tbloffset), times(v279, load(tbl, 53 * VECWIDTH + tbloffset))); scatter(out, 12, 32, plus(v293, v313)); real2 v348 = minus(v293, v313); scatter(out, 28, 32, timesminusplus(v348, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v348), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v87 = timesminusplus(reverse(v77), load(tbl, 10 * VECWIDTH + tbloffset), times(v77, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v147 = timesminusplus(reverse(v137), load(tbl, 22 * VECWIDTH + tbloffset), times(v137, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v187 = timesminusplus(reverse(v177), load(tbl, 30 * VECWIDTH + tbloffset), times(v177, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v167 = timesminusplus(reverse(v157), load(tbl, 26 * VECWIDTH + tbloffset), times(v157, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v413 = plus(v87, v167); real2 v407 = reverse(minus(v87, v167)); real2 v67 = timesminusplus(reverse(v57), load(tbl, 6 * VECWIDTH + tbloffset), times(v57, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v107 = timesminusplus(reverse(v97), load(tbl, 14 * VECWIDTH + tbloffset), times(v97, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v427 = reverse(minus(v107, v187)); real2 v433 = plus(v107, v187); real2 v432 = plus(v67, v147); real2 v428 = minus(v147, v67); real2 v453 = plus(v432, v433); real2 v447 = reverse(minus(v432, v433)); real2 v408 = minus(v127, v47); real2 v412 = plus(v47, v127); real2 v452 = plus(v412, v413); real2 v448 = minus(v413, v412); scatter(out, 1, 32, plus(v452, v453)); real2 v466 = minus(v452, v453); scatter(out, 17, 32, timesminusplus(v466, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v466), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v451 = minusplus(uminus(v447), v448); scatter(out, 25, 32, timesminusplus(reverse(v451), load(tbl, 80 * VECWIDTH + tbloffset), times(v451, load(tbl, 81 * VECWIDTH + tbloffset)))); real2 v449 = minusplus(v447, v448); scatter(out, 9, 32, timesminusplus(reverse(v449), load(tbl, 78 * VECWIDTH + tbloffset), times(v449, load(tbl, 79 * VECWIDTH + tbloffset)))); real2 v429 = minusplus(v427, v428); real2 v431 = minusplus(uminus(v427), v428); real2 v445 = timesminusplus(reverse(v431), load(tbl, 76 * VECWIDTH + tbloffset), times(v431, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v409 = minusplus(v407, v408); real2 v411 = minusplus(uminus(v407), v408); real2 v425 = timesminusplus(reverse(v411), load(tbl, 72 * VECWIDTH + tbloffset), times(v411, load(tbl, 73 * VECWIDTH + tbloffset))); scatter(out, 13, 32, plus(v425, v445)); real2 v478 = minus(v425, v445); scatter(out, 29, 32, timesminusplus(v478, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v478), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v439 = timesminusplus(reverse(v429), load(tbl, 74 * VECWIDTH + tbloffset), times(v429, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v419 = timesminusplus(reverse(v409), load(tbl, 70 * VECWIDTH + tbloffset), times(v409, load(tbl, 71 * VECWIDTH + tbloffset))); scatter(out, 5, 32, plus(v419, v439)); real2 v472 = minus(v419, v439); scatter(out, 21, 32, timesminusplus(v472, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v472), load(tbl, 1 * VECWIDTH + tbloffset)))); } } ALIGNED(8192) void tbut32b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; real2 v14 = load(in, 12 << inShift); real2 v30 = load(in, 28 << inShift); real2 v115 = reverse(minus(v30, v14)); real2 v121 = plus(v14, v30); real2 v6 = load(in, 4 << inShift); real2 v22 = load(in, 20 << inShift); real2 v120 = plus(v6, v22); real2 v116 = minus(v22, v6); real2 v201 = plus(v120, v121); real2 v195 = reverse(minus(v121, v120)); real2 v119 = minusplus(uminus(v115), v116); real2 v117 = minusplus(v115, v116); real2 v133 = timesminusplus(reverse(v119), load(tbl, 20 * VECWIDTH + tbloffset), times(v119, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v127 = timesminusplus(reverse(v117), load(tbl, 18 * VECWIDTH + tbloffset), times(v117, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v18 = load(in, 16 << inShift); real2 v2 = load(in, 0 << inShift); real2 v40 = plus(v2, v18); real2 v36 = minus(v18, v2); real2 v10 = load(in, 8 << inShift); real2 v26 = load(in, 24 << inShift); real2 v41 = plus(v10, v26); real2 v35 = reverse(minus(v26, v10)); real2 v200 = plus(v40, v41); real2 v196 = minus(v41, v40); real2 v37 = minusplus(v35, v36); real2 v39 = minusplus(uminus(v35), v36); real2 v53 = timesminusplus(reverse(v39), load(tbl, 4 * VECWIDTH + tbloffset), times(v39, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v276 = minus(v201, v200); real2 v280 = plus(v200, v201); real2 v47 = timesminusplus(reverse(v37), load(tbl, 2 * VECWIDTH + tbloffset), times(v37, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v199 = minusplus(uminus(v195), v196); real2 v197 = minusplus(v195, v196); real2 v486 = minus(v133, v53); real2 v490 = plus(v53, v133); real2 v213 = timesminusplus(reverse(v199), load(tbl, 36 * VECWIDTH + tbloffset), times(v199, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v207 = timesminusplus(reverse(v197), load(tbl, 34 * VECWIDTH + tbloffset), times(v197, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v28 = load(in, 26 << inShift); real2 v12 = load(in, 10 << inShift); real2 v81 = plus(v12, v28); real2 v75 = reverse(minus(v28, v12)); real2 v20 = load(in, 18 << inShift); real2 v4 = load(in, 2 << inShift); real2 v80 = plus(v4, v20); real2 v76 = minus(v20, v4); real2 v236 = minus(v81, v80); real2 v240 = plus(v80, v81); real2 v77 = minusplus(v75, v76); real2 v79 = minusplus(uminus(v75), v76); real2 v93 = timesminusplus(reverse(v79), load(tbl, 12 * VECWIDTH + tbloffset), times(v79, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v32 = load(in, 30 << inShift); real2 v16 = load(in, 14 << inShift); real2 v155 = reverse(minus(v32, v16)); real2 v161 = plus(v16, v32); real2 v24 = load(in, 22 << inShift); real2 v8 = load(in, 6 << inShift); real2 v160 = plus(v8, v24); real2 v156 = minus(v24, v8); real2 v235 = reverse(minus(v161, v160)); real2 v241 = plus(v160, v161); real2 v157 = minusplus(v155, v156); real2 v159 = minusplus(uminus(v155), v156); real2 v173 = timesminusplus(reverse(v159), load(tbl, 28 * VECWIDTH + tbloffset), times(v159, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v485 = reverse(minus(v173, v93)); real2 v491 = plus(v93, v173); real2 v489 = minusplus(uminus(v485), v486); real2 v487 = minusplus(v485, v486); real2 v239 = minusplus(uminus(v235), v236); real2 v237 = minusplus(v235, v236); real2 v253 = timesminusplus(reverse(v239), load(tbl, 44 * VECWIDTH + tbloffset), times(v239, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v497 = timesminusplus(reverse(v487), load(tbl, 82 * VECWIDTH + tbloffset), times(v487, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v530 = plus(v490, v491); real2 v526 = minus(v491, v490); real2 v503 = timesminusplus(reverse(v489), load(tbl, 84 * VECWIDTH + tbloffset), times(v489, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v247 = timesminusplus(reverse(v237), load(tbl, 42 * VECWIDTH + tbloffset), times(v237, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v356 = minus(v247, v207); real2 v360 = plus(v207, v247); real2 v386 = plus(v213, v253); real2 v382 = minus(v253, v213); real2 v17 = load(in, 15 << inShift); real2 v33 = load(in, 31 << inShift); real2 v175 = reverse(minus(v33, v17)); real2 v181 = plus(v17, v33); real2 v25 = load(in, 23 << inShift); real2 v9 = load(in, 7 << inShift); real2 v176 = minus(v25, v9); real2 v180 = plus(v9, v25); real2 v177 = minusplus(v175, v176); real2 v179 = minusplus(uminus(v175), v176); real2 v193 = timesminusplus(reverse(v179), load(tbl, 32 * VECWIDTH + tbloffset), times(v179, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v261 = plus(v180, v181); real2 v255 = reverse(minus(v181, v180)); real2 v29 = load(in, 27 << inShift); real2 v13 = load(in, 11 << inShift); real2 v101 = plus(v13, v29); real2 v95 = reverse(minus(v29, v13)); real2 v21 = load(in, 19 << inShift); real2 v5 = load(in, 3 << inShift); real2 v100 = plus(v5, v21); real2 v96 = minus(v21, v5); real2 v99 = minusplus(uminus(v95), v96); real2 v97 = minusplus(v95, v96); real2 v260 = plus(v100, v101); real2 v256 = minus(v101, v100); real2 v259 = minusplus(uminus(v255), v256); real2 v257 = minusplus(v255, v256); real2 v273 = timesminusplus(reverse(v259), load(tbl, 48 * VECWIDTH + tbloffset), times(v259, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v267 = timesminusplus(reverse(v257), load(tbl, 46 * VECWIDTH + tbloffset), times(v257, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v3 = load(in, 1 << inShift); real2 v19 = load(in, 17 << inShift); real2 v60 = plus(v3, v19); real2 v56 = minus(v19, v3); real2 v27 = load(in, 25 << inShift); real2 v11 = load(in, 9 << inShift); real2 v55 = reverse(minus(v27, v11)); real2 v61 = plus(v11, v27); real2 v220 = plus(v60, v61); real2 v216 = minus(v61, v60); real2 v7 = load(in, 5 << inShift); real2 v23 = load(in, 21 << inShift); real2 v136 = minus(v23, v7); real2 v140 = plus(v7, v23); real2 v15 = load(in, 13 << inShift); real2 v31 = load(in, 29 << inShift); real2 v135 = reverse(minus(v31, v15)); real2 v141 = plus(v15, v31); real2 v215 = reverse(minus(v141, v140)); real2 v221 = plus(v140, v141); real2 v219 = minusplus(uminus(v215), v216); real2 v217 = minusplus(v215, v216); real2 v227 = timesminusplus(reverse(v217), load(tbl, 38 * VECWIDTH + tbloffset), times(v217, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v355 = reverse(minus(v267, v227)); real2 v361 = plus(v227, v267); scatter(out, 2, 32, plus(v360, v361)); real2 v374 = minus(v360, v361); scatter(out, 18, 32, timesminusplus(v374, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v374), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v357 = minusplus(v355, v356); scatter(out, 10, 32, timesminusplus(reverse(v357), load(tbl, 62 * VECWIDTH + tbloffset), times(v357, load(tbl, 63 * VECWIDTH + tbloffset)))); real2 v359 = minusplus(uminus(v355), v356); scatter(out, 26, 32, timesminusplus(reverse(v359), load(tbl, 64 * VECWIDTH + tbloffset), times(v359, load(tbl, 65 * VECWIDTH + tbloffset)))); real2 v233 = timesminusplus(reverse(v219), load(tbl, 40 * VECWIDTH + tbloffset), times(v219, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v381 = reverse(minus(v273, v233)); real2 v387 = plus(v233, v273); scatter(out, 6, 32, plus(v386, v387)); real2 v400 = minus(v386, v387); scatter(out, 22, 32, timesminusplus(v400, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v400), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v383 = minusplus(v381, v382); real2 v385 = minusplus(uminus(v381), v382); scatter(out, 30, 32, timesminusplus(reverse(v385), load(tbl, 68 * VECWIDTH + tbloffset), times(v385, load(tbl, 69 * VECWIDTH + tbloffset)))); scatter(out, 14, 32, timesminusplus(reverse(v383), load(tbl, 66 * VECWIDTH + tbloffset), times(v383, load(tbl, 67 * VECWIDTH + tbloffset)))); real2 v137 = minusplus(v135, v136); real2 v139 = minusplus(uminus(v135), v136); real2 v153 = timesminusplus(reverse(v139), load(tbl, 24 * VECWIDTH + tbloffset), times(v139, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v113 = timesminusplus(reverse(v99), load(tbl, 16 * VECWIDTH + tbloffset), times(v99, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v511 = plus(v113, v193); real2 v505 = reverse(minus(v193, v113)); real2 v57 = minusplus(v55, v56); real2 v59 = minusplus(uminus(v55), v56); real2 v73 = timesminusplus(reverse(v59), load(tbl, 8 * VECWIDTH + tbloffset), times(v59, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v510 = plus(v73, v153); real2 v506 = minus(v153, v73); real2 v531 = plus(v510, v511); real2 v525 = reverse(minus(v511, v510)); scatter(out, 3, 32, plus(v530, v531)); real2 v544 = minus(v530, v531); scatter(out, 19, 32, timesminusplus(v544, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v544), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v527 = minusplus(v525, v526); scatter(out, 11, 32, timesminusplus(reverse(v527), load(tbl, 90 * VECWIDTH + tbloffset), times(v527, load(tbl, 91 * VECWIDTH + tbloffset)))); real2 v529 = minusplus(uminus(v525), v526); scatter(out, 27, 32, timesminusplus(reverse(v529), load(tbl, 92 * VECWIDTH + tbloffset), times(v529, load(tbl, 93 * VECWIDTH + tbloffset)))); real2 v509 = minusplus(uminus(v505), v506); real2 v507 = minusplus(v505, v506); real2 v523 = timesminusplus(reverse(v509), load(tbl, 88 * VECWIDTH + tbloffset), times(v509, load(tbl, 89 * VECWIDTH + tbloffset))); scatter(out, 15, 32, plus(v503, v523)); real2 v556 = minus(v503, v523); scatter(out, 31, 32, timesminusplus(v556, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v556), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v517 = timesminusplus(reverse(v507), load(tbl, 86 * VECWIDTH + tbloffset), times(v507, load(tbl, 87 * VECWIDTH + tbloffset))); scatter(out, 7, 32, plus(v497, v517)); real2 v550 = minus(v497, v517); scatter(out, 23, 32, timesminusplus(v550, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v550), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v275 = reverse(minus(v241, v240)); real2 v281 = plus(v240, v241); real2 v320 = plus(v280, v281); real2 v316 = minus(v281, v280); real2 v301 = plus(v260, v261); real2 v295 = reverse(minus(v261, v260)); real2 v300 = plus(v220, v221); real2 v296 = minus(v221, v220); real2 v315 = reverse(minus(v301, v300)); real2 v321 = plus(v300, v301); scatter(out, 0, 32, plus(v320, v321)); real2 v334 = minus(v320, v321); scatter(out, 16, 32, timesminusplus(v334, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v334), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v319 = minusplus(uminus(v315), v316); real2 v317 = minusplus(v315, v316); scatter(out, 8, 32, timesminusplus(reverse(v317), load(tbl, 58 * VECWIDTH + tbloffset), times(v317, load(tbl, 59 * VECWIDTH + tbloffset)))); scatter(out, 24, 32, timesminusplus(reverse(v319), load(tbl, 60 * VECWIDTH + tbloffset), times(v319, load(tbl, 61 * VECWIDTH + tbloffset)))); real2 v299 = minusplus(uminus(v295), v296); real2 v297 = minusplus(v295, v296); real2 v279 = minusplus(uminus(v275), v276); real2 v277 = minusplus(v275, v276); real2 v287 = timesminusplus(reverse(v277), load(tbl, 50 * VECWIDTH + tbloffset), times(v277, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v307 = timesminusplus(reverse(v297), load(tbl, 54 * VECWIDTH + tbloffset), times(v297, load(tbl, 55 * VECWIDTH + tbloffset))); scatter(out, 4, 32, plus(v287, v307)); real2 v342 = minus(v287, v307); scatter(out, 20, 32, timesminusplus(v342, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v342), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v313 = timesminusplus(reverse(v299), load(tbl, 56 * VECWIDTH + tbloffset), times(v299, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v293 = timesminusplus(reverse(v279), load(tbl, 52 * VECWIDTH + tbloffset), times(v279, load(tbl, 53 * VECWIDTH + tbloffset))); scatter(out, 12, 32, plus(v293, v313)); real2 v348 = minus(v293, v313); scatter(out, 28, 32, timesminusplus(v348, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v348), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v87 = timesminusplus(reverse(v77), load(tbl, 10 * VECWIDTH + tbloffset), times(v77, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v147 = timesminusplus(reverse(v137), load(tbl, 22 * VECWIDTH + tbloffset), times(v137, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v187 = timesminusplus(reverse(v177), load(tbl, 30 * VECWIDTH + tbloffset), times(v177, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v167 = timesminusplus(reverse(v157), load(tbl, 26 * VECWIDTH + tbloffset), times(v157, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v413 = plus(v87, v167); real2 v407 = reverse(minus(v167, v87)); real2 v67 = timesminusplus(reverse(v57), load(tbl, 6 * VECWIDTH + tbloffset), times(v57, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v107 = timesminusplus(reverse(v97), load(tbl, 14 * VECWIDTH + tbloffset), times(v97, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v427 = reverse(minus(v187, v107)); real2 v433 = plus(v107, v187); real2 v432 = plus(v67, v147); real2 v428 = minus(v147, v67); real2 v453 = plus(v432, v433); real2 v447 = reverse(minus(v433, v432)); real2 v408 = minus(v127, v47); real2 v412 = plus(v47, v127); real2 v452 = plus(v412, v413); real2 v448 = minus(v413, v412); scatter(out, 1, 32, plus(v452, v453)); real2 v466 = minus(v452, v453); scatter(out, 17, 32, timesminusplus(v466, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v466), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v451 = minusplus(uminus(v447), v448); scatter(out, 25, 32, timesminusplus(reverse(v451), load(tbl, 80 * VECWIDTH + tbloffset), times(v451, load(tbl, 81 * VECWIDTH + tbloffset)))); real2 v449 = minusplus(v447, v448); scatter(out, 9, 32, timesminusplus(reverse(v449), load(tbl, 78 * VECWIDTH + tbloffset), times(v449, load(tbl, 79 * VECWIDTH + tbloffset)))); real2 v429 = minusplus(v427, v428); real2 v431 = minusplus(uminus(v427), v428); real2 v445 = timesminusplus(reverse(v431), load(tbl, 76 * VECWIDTH + tbloffset), times(v431, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v409 = minusplus(v407, v408); real2 v411 = minusplus(uminus(v407), v408); real2 v425 = timesminusplus(reverse(v411), load(tbl, 72 * VECWIDTH + tbloffset), times(v411, load(tbl, 73 * VECWIDTH + tbloffset))); scatter(out, 13, 32, plus(v425, v445)); real2 v478 = minus(v425, v445); scatter(out, 29, 32, timesminusplus(v478, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v478), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v439 = timesminusplus(reverse(v429), load(tbl, 74 * VECWIDTH + tbloffset), times(v429, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v419 = timesminusplus(reverse(v409), load(tbl, 70 * VECWIDTH + tbloffset), times(v409, load(tbl, 71 * VECWIDTH + tbloffset))); scatter(out, 5, 32, plus(v419, v439)); real2 v472 = minus(v419, v439); scatter(out, 21, 32, timesminusplus(v472, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v472), load(tbl, 1 * VECWIDTH + tbloffset)))); } } #endif #if MAXBUTWIDTH >= 6 ALIGNED(8192) void dft64f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; // Pres : 27834 real2 v13 = load(in, 11 << shift); real2 v45 = load(in, 43 << shift); real2 v268 = plus(v13, v45); real2 v264 = minus(v45, v13); real2 v61 = load(in, 59 << shift); real2 v29 = load(in, 27 << shift); real2 v269 = plus(v29, v61); real2 v263 = reverse(minus(v29, v61)); real2 v401 = reverse(minus(v268, v269)); real2 v407 = plus(v268, v269); real2 v267 = minusplus(uminus(v263), v264); real2 v265 = minusplus(v263, v264); real2 v279 = ctimesminusplus(reverse(v267), ctbl[28], ctimes(v267, ctbl[14])); real2 v273 = ctimesminusplus(reverse(v265), ctbl[25], ctimes(v265, ctbl[19])); real2 v5 = load(in, 3 << shift); real2 v37 = load(in, 35 << shift); real2 v124 = minus(v37, v5); real2 v128 = plus(v5, v37); real2 v21 = load(in, 19 << shift); real2 v53 = load(in, 51 << shift); real2 v129 = plus(v21, v53); real2 v123 = reverse(minus(v21, v53)); real2 v402 = minus(v129, v128); real2 v406 = plus(v128, v129); real2 v405 = minusplus(uminus(v401), v402); real2 v403 = minusplus(v401, v402); real2 v415 = ctimesminusplus(reverse(v405), ctbl[13], ctimes(v405, ctbl[12])); real2 v411 = ctimesminusplus(reverse(v403), ctbl[11], ctimes(v403, ctbl[9])); real2 v125 = minusplus(v123, v124); real2 v127 = minusplus(uminus(v123), v124); real2 v139 = ctimesminusplus(reverse(v127), ctbl[17], ctimes(v127, ctbl[27])); real2 v534 = plus(v406, v407); real2 v530 = minus(v407, v406); real2 v962 = minus(v279, v139); real2 v966 = plus(v139, v279); real2 v133 = ctimesminusplus(reverse(v125), ctbl[23], ctimes(v125, ctbl[21])); real2 v807 = plus(v133, v273); real2 v803 = minus(v273, v133); real2 v65 = load(in, 63 << shift); real2 v33 = load(in, 31 << shift); real2 v335 = reverse(minus(v33, v65)); real2 v341 = plus(v33, v65); real2 v49 = load(in, 47 << shift); real2 v17 = load(in, 15 << shift); real2 v340 = plus(v17, v49); real2 v336 = minus(v49, v17); real2 v471 = plus(v340, v341); real2 v465 = reverse(minus(v340, v341)); real2 v339 = minusplus(uminus(v335), v336); real2 v337 = minusplus(v335, v336); real2 v351 = ctimesminusplus(reverse(v339), ctbl[22], ctimes(v339, ctbl[20])); real2 v345 = ctimesminusplus(reverse(v337), ctbl[29], ctimes(v337, ctbl[15])); real2 v9 = load(in, 7 << shift); real2 v41 = load(in, 39 << shift); real2 v199 = plus(v9, v41); real2 v195 = minus(v41, v9); real2 v57 = load(in, 55 << shift); real2 v25 = load(in, 23 << shift); real2 v200 = plus(v25, v57); real2 v194 = reverse(minus(v25, v57)); real2 v466 = minus(v200, v199); real2 v470 = plus(v199, v200); real2 v535 = plus(v470, v471); real2 v529 = reverse(minus(v470, v471)); real2 v469 = minusplus(uminus(v465), v466); real2 v467 = minusplus(v465, v466); real2 v531 = minusplus(v529, v530); real2 v533 = minusplus(uminus(v529), v530); real2 v543 = ctimesminusplus(reverse(v533), ctbl[4], ctimes(v533, ctbl[2])); real2 v539 = ctimesminusplus(reverse(v531), ctbl[5], ctimes(v531, ctbl[3])); real2 v561 = reverse(minus(v534, v535)); real2 v567 = plus(v534, v535); real2 v479 = ctimesminusplus(reverse(v469), ctbl[10], ctimes(v469, ctbl[8])); real2 v719 = plus(v415, v479); real2 v713 = reverse(minus(v415, v479)); real2 v475 = ctimesminusplus(reverse(v467), ctbl[13], ctimes(v467, ctbl[7])); real2 v662 = plus(v411, v475); real2 v656 = reverse(minus(v411, v475)); real2 v196 = minusplus(v194, v195); real2 v198 = minusplus(uminus(v194), v195); real2 v209 = ctimesminusplus(reverse(v198), ctbl[25], ctimes(v198, ctbl[24])); real2 v961 = reverse(minus(v209, v351)); real2 v967 = plus(v209, v351); real2 v963 = minusplus(v961, v962); real2 v965 = minusplus(uminus(v961), v962); real2 v975 = ctimesminusplus(reverse(v965), ctbl[4], ctimes(v965, ctbl[2])); real2 v971 = ctimesminusplus(reverse(v963), ctbl[5], ctimes(v963, ctbl[3])); real2 v999 = plus(v966, v967); real2 v993 = reverse(minus(v966, v967)); real2 v31 = load(in, 29 << shift); real2 v63 = load(in, 61 << shift); real2 v305 = plus(v31, v63); real2 v299 = reverse(minus(v31, v63)); real2 v47 = load(in, 45 << shift); real2 v15 = load(in, 13 << shift); real2 v300 = minus(v47, v15); real2 v304 = plus(v15, v47); real2 v439 = plus(v304, v305); real2 v433 = reverse(minus(v304, v305)); real2 v301 = minusplus(v299, v300); real2 v303 = minusplus(uminus(v299), v300); real2 v315 = ctimesminusplus(reverse(v303), ctbl[16], ctimes(v303, ctbl[26])); real2 v7 = load(in, 5 << shift); real2 v39 = load(in, 37 << shift); real2 v164 = plus(v7, v39); real2 v160 = minus(v39, v7); real2 v23 = load(in, 21 << shift); real2 v55 = load(in, 53 << shift); real2 v159 = reverse(minus(v23, v55)); real2 v165 = plus(v23, v55); real2 v438 = plus(v164, v165); real2 v434 = minus(v165, v164); real2 v163 = minusplus(uminus(v159), v160); real2 v161 = minusplus(v159, v160); real2 v175 = ctimesminusplus(reverse(v163), ctbl[29], ctimes(v163, ctbl[15])); real2 v929 = reverse(minus(v175, v315)); real2 v935 = plus(v175, v315); real2 v435 = minusplus(v433, v434); real2 v437 = minusplus(uminus(v433), v434); real2 v443 = ctimesminusplus(reverse(v435), ctbl[9], ctimes(v435, ctbl[11])); real2 v497 = reverse(minus(v438, v439)); real2 v503 = plus(v438, v439); real2 v447 = ctimesminusplus(reverse(v437), ctbl[7], ctimes(v437, ctbl[6])); real2 v11 = load(in, 9 << shift); real2 v43 = load(in, 41 << shift); real2 v234 = plus(v11, v43); real2 v230 = minus(v43, v11); real2 v27 = load(in, 25 << shift); real2 v59 = load(in, 57 << shift); real2 v235 = plus(v27, v59); real2 v229 = reverse(minus(v27, v59)); real2 v375 = plus(v234, v235); real2 v369 = reverse(minus(v234, v235)); real2 v233 = minusplus(uminus(v229), v230); real2 v231 = minusplus(v229, v230); real2 v244 = ctimesminusplus(reverse(v233), ctbl[19], ctimes(v233, ctbl[18])); real2 v19 = load(in, 17 << shift); real2 v51 = load(in, 49 << shift); real2 v89 = plus(v19, v51); real2 v83 = reverse(minus(v19, v51)); real2 v3 = load(in, 1 << shift); real2 v35 = load(in, 33 << shift); real2 v88 = plus(v3, v35); real2 v84 = minus(v35, v3); real2 v370 = minus(v89, v88); real2 v374 = plus(v88, v89); real2 v371 = minusplus(v369, v370); real2 v373 = minusplus(uminus(v369), v370); real2 v383 = ctimesminusplus(reverse(v373), ctbl[11], ctimes(v373, ctbl[9])); real2 v714 = minus(v447, v383); real2 v718 = plus(v383, v447); real2 v502 = plus(v374, v375); real2 v498 = minus(v375, v374); real2 v379 = ctimesminusplus(reverse(v371), ctbl[7], ctimes(v371, ctbl[13])); real2 v657 = minus(v443, v379); real2 v661 = plus(v379, v443); real2 v715 = minusplus(v713, v714); real2 v717 = minusplus(uminus(v713), v714); real2 v566 = plus(v502, v503); real2 v562 = minus(v503, v502); real2 v499 = minusplus(v497, v498); real2 v501 = minusplus(uminus(v497), v498); real2 v511 = ctimesminusplus(reverse(v501), ctbl[5], ctimes(v501, ctbl[3])); real2 v621 = reverse(minus(v511, v543)); real2 v627 = plus(v511, v543); real2 v583 = plus(v566, v567); real2 v577 = reverse(minus(v566, v567)); real2 v727 = ctimesminusplus(reverse(v717), ctbl[1], ctimes(v717, ctbl[0])); real2 v723 = ctimesminusplus(reverse(v715), ctbl[1], ctimes(v715, ctbl[1])); real2 v507 = ctimesminusplus(reverse(v499), ctbl[3], ctimes(v499, ctbl[5])); real2 v735 = plus(v718, v719); real2 v729 = reverse(minus(v718, v719)); real2 v565 = minusplus(uminus(v561), v562); real2 v563 = minusplus(v561, v562); real2 v571 = ctimesminusplus(reverse(v563), ctbl[1], ctimes(v563, ctbl[1])); real2 v602 = reverse(minus(v507, v539)); real2 v608 = plus(v507, v539); real2 v660 = minusplus(uminus(v656), v657); real2 v658 = minusplus(v656, v657); real2 v670 = ctimesminusplus(reverse(v660), ctbl[1], ctimes(v660, ctbl[0])); real2 v666 = ctimesminusplus(reverse(v658), ctbl[1], ctimes(v658, ctbl[1])); real2 v678 = plus(v661, v662); real2 v672 = reverse(minus(v661, v662)); real2 v575 = ctimesminusplus(reverse(v565), ctbl[1], ctimes(v565, ctbl[0])); real2 v28 = load(in, 26 << shift); real2 v60 = load(in, 58 << shift); real2 v252 = plus(v28, v60); real2 v246 = reverse(minus(v28, v60)); real2 v44 = load(in, 42 << shift); real2 v12 = load(in, 10 << shift); real2 v251 = plus(v12, v44); real2 v247 = minus(v44, v12); real2 v391 = plus(v251, v252); real2 v385 = reverse(minus(v251, v252)); real2 v20 = load(in, 18 << shift); real2 v52 = load(in, 50 << shift); real2 v109 = plus(v20, v52); real2 v103 = reverse(minus(v20, v52)); real2 v36 = load(in, 34 << shift); real2 v4 = load(in, 2 << shift); real2 v108 = plus(v4, v36); real2 v104 = minus(v36, v4); real2 v386 = minus(v109, v108); real2 v390 = plus(v108, v109); real2 v514 = minus(v391, v390); real2 v518 = plus(v390, v391); real2 v389 = minusplus(uminus(v385), v386); real2 v387 = minusplus(v385, v386); real2 v399 = ctimesminusplus(reverse(v389), ctbl[5], ctimes(v389, ctbl[3])); real2 v8 = load(in, 6 << shift); real2 v40 = load(in, 38 << shift); real2 v178 = minus(v40, v8); real2 v182 = plus(v8, v40); real2 v24 = load(in, 22 << shift); real2 v56 = load(in, 54 << shift); real2 v183 = plus(v24, v56); real2 v177 = reverse(minus(v24, v56)); real2 v450 = minus(v183, v182); real2 v454 = plus(v182, v183); real2 v16 = load(in, 14 << shift); real2 v48 = load(in, 46 << shift); real2 v322 = plus(v16, v48); real2 v318 = minus(v48, v16); real2 v32 = load(in, 30 << shift); real2 v64 = load(in, 62 << shift); real2 v323 = plus(v32, v64); real2 v317 = reverse(minus(v32, v64)); real2 v449 = reverse(minus(v322, v323)); real2 v455 = plus(v322, v323); real2 v519 = plus(v454, v455); real2 v513 = reverse(minus(v454, v455)); real2 v545 = reverse(minus(v518, v519)); real2 v551 = plus(v518, v519); real2 v515 = minusplus(v513, v514); real2 v517 = minusplus(uminus(v513), v514); real2 v527 = ctimesminusplus(reverse(v517), ctbl[1], ctimes(v517, ctbl[0])); real2 v523 = ctimesminusplus(reverse(v515), ctbl[1], ctimes(v515, ctbl[1])); real2 v14 = load(in, 12 << shift); real2 v46 = load(in, 44 << shift); real2 v286 = plus(v14, v46); real2 v282 = minus(v46, v14); real2 v62 = load(in, 60 << shift); real2 v30 = load(in, 28 << shift); real2 v281 = reverse(minus(v30, v62)); real2 v287 = plus(v30, v62); real2 v423 = plus(v286, v287); real2 v417 = reverse(minus(v286, v287)); real2 v22 = load(in, 20 << shift); real2 v54 = load(in, 52 << shift); real2 v147 = plus(v22, v54); real2 v141 = reverse(minus(v22, v54)); real2 v38 = load(in, 36 << shift); real2 v6 = load(in, 4 << shift); real2 v146 = plus(v6, v38); real2 v142 = minus(v38, v6); real2 v422 = plus(v146, v147); real2 v418 = minus(v147, v146); real2 v487 = plus(v422, v423); real2 v481 = reverse(minus(v422, v423)); real2 v42 = load(in, 40 << shift); real2 v10 = load(in, 8 << shift); real2 v212 = minus(v42, v10); real2 v216 = plus(v10, v42); real2 v58 = load(in, 56 << shift); real2 v26 = load(in, 24 << shift); real2 v217 = plus(v26, v58); real2 v211 = reverse(minus(v26, v58)); real2 v353 = reverse(minus(v216, v217)); real2 v359 = plus(v216, v217); real2 v18 = load(in, 16 << shift); real2 v50 = load(in, 48 << shift); real2 v73 = plus(v18, v50); real2 v67 = reverse(minus(v18, v50)); real2 v2 = load(in, 0 << shift); real2 v34 = load(in, 32 << shift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v358 = plus(v72, v73); real2 v354 = minus(v73, v72); real2 v486 = plus(v358, v359); real2 v482 = minus(v359, v358); real2 v491 = minus(uplusminus(v481), v482); real2 v495 = minus(uminusplus(v481), v482); real2 v603 = minus(v523, v491); real2 v607 = plus(v491, v523); store(out, 4 << shift, plus(v607, v608)); store(out, 36 << shift, minus(v607, v608)); store(out, 52 << shift, minus(uminusplus(v602), v603)); store(out, 20 << shift, minus(uplusminus(v602), v603)); real2 v622 = minus(v527, v495); real2 v626 = plus(v495, v527); store(out, 60 << shift, minus(uminusplus(v621), v622)); store(out, 28 << shift, minus(uplusminus(v621), v622)); store(out, 12 << shift, plus(v626, v627)); store(out, 44 << shift, minus(v626, v627)); real2 v550 = plus(v486, v487); real2 v546 = minus(v487, v486); real2 v559 = minus(uminusplus(v545), v546); real2 v555 = minus(uplusminus(v545), v546); store(out, 8 << shift, plus(v555, v571)); store(out, 40 << shift, minus(v555, v571)); store(out, 24 << shift, plus(v559, v575)); store(out, 56 << shift, minus(v559, v575)); real2 v578 = minus(v551, v550); store(out, 48 << shift, minus(uminusplus(v577), v578)); store(out, 16 << shift, minus(uplusminus(v577), v578)); real2 v582 = plus(v550, v551); store(out, 0 << shift, plus(v582, v583)); store(out, 32 << shift, minus(v582, v583)); real2 v453 = minusplus(uminus(v449), v450); real2 v451 = minusplus(v449, v450); real2 v419 = minusplus(v417, v418); real2 v421 = minusplus(uminus(v417), v418); real2 v431 = ctimesminusplus(reverse(v421), ctbl[1], ctimes(v421, ctbl[0])); real2 v463 = ctimesminusplus(reverse(v453), ctbl[4], ctimes(v453, ctbl[2])); real2 v703 = plus(v399, v463); real2 v697 = reverse(minus(v399, v463)); real2 v367 = minus(uminusplus(v353), v354); real2 v363 = minus(uplusminus(v353), v354); real2 v702 = plus(v367, v431); real2 v698 = minus(v431, v367); real2 v730 = minus(v703, v702); store(out, 54 << shift, minus(uminusplus(v729), v730)); store(out, 22 << shift, minus(uplusminus(v729), v730)); real2 v734 = plus(v702, v703); store(out, 6 << shift, plus(v734, v735)); store(out, 38 << shift, minus(v734, v735)); real2 v707 = minus(uplusminus(v697), v698); real2 v711 = minus(uminusplus(v697), v698); store(out, 30 << shift, plus(v711, v727)); store(out, 62 << shift, minus(v711, v727)); store(out, 14 << shift, plus(v707, v723)); store(out, 46 << shift, minus(v707, v723)); real2 v395 = ctimesminusplus(reverse(v387), ctbl[3], ctimes(v387, ctbl[5])); real2 v459 = ctimesminusplus(reverse(v451), ctbl[5], ctimes(v451, ctbl[3])); real2 v640 = reverse(minus(v395, v459)); real2 v646 = plus(v395, v459); real2 v427 = ctimesminusplus(reverse(v419), ctbl[1], ctimes(v419, ctbl[1])); real2 v641 = minus(v427, v363); real2 v645 = plus(v363, v427); real2 v654 = minus(uminusplus(v640), v641); real2 v650 = minus(uplusminus(v640), v641); store(out, 10 << shift, plus(v650, v666)); store(out, 42 << shift, minus(v650, v666)); store(out, 58 << shift, minus(v654, v670)); store(out, 26 << shift, plus(v654, v670)); real2 v673 = minus(v646, v645); store(out, 50 << shift, minus(uminusplus(v672), v673)); store(out, 18 << shift, minus(uplusminus(v672), v673)); real2 v677 = plus(v645, v646); store(out, 2 << shift, plus(v677, v678)); store(out, 34 << shift, minus(v677, v678)); real2 v250 = minusplus(uminus(v246), v247); real2 v248 = minusplus(v246, v247); real2 v261 = ctimesminusplus(reverse(v250), ctbl[7], ctimes(v250, ctbl[6])); real2 v145 = minusplus(uminus(v141), v142); real2 v143 = minusplus(v141, v142); real2 v283 = minusplus(v281, v282); real2 v285 = minusplus(uminus(v281), v282); real2 v297 = ctimesminusplus(reverse(v285), ctbl[4], ctimes(v285, ctbl[2])); real2 v157 = ctimesminusplus(reverse(v145), ctbl[5], ctimes(v145, ctbl[3])); real2 v919 = plus(v157, v297); real2 v913 = reverse(minus(v157, v297)); real2 v213 = minusplus(v211, v212); real2 v215 = minusplus(uminus(v211), v212); real2 v227 = ctimesminusplus(reverse(v215), ctbl[1], ctimes(v215, ctbl[0])); real2 v81 = minus(uminusplus(v67), v68); real2 v77 = minus(uplusminus(v67), v68); real2 v85 = minusplus(v83, v84); real2 v87 = minusplus(uminus(v83), v84); real2 v101 = ctimesminusplus(reverse(v87), ctbl[23], ctimes(v87, ctbl[21])); real2 v934 = plus(v101, v244); real2 v930 = minus(v244, v101); real2 v179 = minusplus(v177, v178); real2 v181 = minusplus(uminus(v177), v178); real2 v192 = ctimesminusplus(reverse(v181), ctbl[13], ctimes(v181, ctbl[12])); real2 v918 = plus(v81, v227); real2 v914 = minus(v227, v81); real2 v105 = minusplus(v103, v104); real2 v107 = minusplus(uminus(v103), v104); real2 v121 = ctimesminusplus(reverse(v107), ctbl[11], ctimes(v107, ctbl[9])); real2 v946 = minus(v261, v121); real2 v950 = plus(v121, v261); real2 v994 = minus(v935, v934); real2 v998 = plus(v934, v935); real2 v1009 = reverse(minus(v998, v999)); real2 v1015 = plus(v998, v999); real2 v982 = plus(v918, v919); real2 v978 = minus(v919, v918); real2 v321 = minusplus(uminus(v317), v318); real2 v319 = minusplus(v317, v318); real2 v333 = ctimesminusplus(reverse(v321), ctbl[10], ctimes(v321, ctbl[8])); real2 v951 = plus(v192, v333); real2 v945 = reverse(minus(v192, v333)); real2 v983 = plus(v950, v951); real2 v977 = reverse(minus(v950, v951)); real2 v1014 = plus(v982, v983); real2 v1010 = minus(v983, v982); store(out, 3 << shift, plus(v1014, v1015)); store(out, 35 << shift, minus(v1014, v1015)); store(out, 51 << shift, minus(uminusplus(v1009), v1010)); store(out, 19 << shift, minus(uplusminus(v1009), v1010)); real2 v997 = minusplus(uminus(v993), v994); real2 v995 = minusplus(v993, v994); real2 v1003 = ctimesminusplus(reverse(v995), ctbl[1], ctimes(v995, ctbl[1])); real2 v987 = minus(uplusminus(v977), v978); store(out, 43 << shift, minus(v987, v1003)); store(out, 11 << shift, plus(v987, v1003)); real2 v991 = minus(uminusplus(v977), v978); real2 v1007 = ctimesminusplus(reverse(v997), ctbl[1], ctimes(v997, ctbl[0])); store(out, 27 << shift, plus(v991, v1007)); store(out, 59 << shift, minus(v991, v1007)); real2 v947 = minusplus(v945, v946); real2 v949 = minusplus(uminus(v945), v946); real2 v931 = minusplus(v929, v930); real2 v933 = minusplus(uminus(v929), v930); real2 v939 = ctimesminusplus(reverse(v931), ctbl[3], ctimes(v931, ctbl[5])); real2 v1034 = reverse(minus(v939, v971)); real2 v1040 = plus(v939, v971); real2 v927 = minus(uminusplus(v913), v914); real2 v923 = minus(uplusminus(v913), v914); real2 v955 = ctimesminusplus(reverse(v947), ctbl[1], ctimes(v947, ctbl[1])); real2 v1035 = minus(v955, v923); real2 v1039 = plus(v923, v955); store(out, 39 << shift, minus(v1039, v1040)); store(out, 7 << shift, plus(v1039, v1040)); store(out, 23 << shift, minus(uplusminus(v1034), v1035)); store(out, 55 << shift, minus(uminusplus(v1034), v1035)); real2 v959 = ctimesminusplus(reverse(v949), ctbl[1], ctimes(v949, ctbl[0])); real2 v943 = ctimesminusplus(reverse(v933), ctbl[5], ctimes(v933, ctbl[3])); real2 v1053 = reverse(minus(v943, v975)); real2 v1059 = plus(v943, v975); real2 v1058 = plus(v927, v959); real2 v1054 = minus(v959, v927); store(out, 63 << shift, minus(uminusplus(v1053), v1054)); store(out, 31 << shift, minus(uplusminus(v1053), v1054)); store(out, 47 << shift, minus(v1058, v1059)); store(out, 15 << shift, plus(v1058, v1059)); real2 v309 = ctimesminusplus(reverse(v301), ctbl[21], ctimes(v301, ctbl[23])); real2 v171 = ctimesminusplus(reverse(v161), ctbl[19], ctimes(v161, ctbl[25])); real2 v776 = plus(v171, v309); real2 v770 = reverse(minus(v171, v309)); real2 v256 = ctimesminusplus(reverse(v248), ctbl[9], ctimes(v248, ctbl[11])); real2 v222 = ctimesminusplus(reverse(v213), ctbl[1], ctimes(v213, ctbl[1])); real2 v239 = ctimesminusplus(reverse(v231), ctbl[17], ctimes(v231, ctbl[27])); real2 v204 = ctimesminusplus(reverse(v196), ctbl[27], ctimes(v196, ctbl[17])); real2 v291 = ctimesminusplus(reverse(v283), ctbl[5], ctimes(v283, ctbl[3])); real2 v153 = ctimesminusplus(reverse(v143), ctbl[3], ctimes(v143, ctbl[5])); real2 v760 = plus(v153, v291); real2 v754 = reverse(minus(v153, v291)); real2 v187 = ctimesminusplus(reverse(v179), ctbl[11], ctimes(v179, ctbl[9])); real2 v95 = ctimesminusplus(reverse(v85), ctbl[15], ctimes(v85, ctbl[29])); real2 v771 = minus(v239, v95); real2 v775 = plus(v95, v239); real2 v839 = plus(v775, v776); real2 v835 = minus(v776, v775); real2 v115 = ctimesminusplus(reverse(v105), ctbl[7], ctimes(v105, ctbl[13])); real2 v791 = plus(v115, v256); real2 v787 = minus(v256, v115); real2 v327 = ctimesminusplus(reverse(v319), ctbl[13], ctimes(v319, ctbl[7])); real2 v792 = plus(v187, v327); real2 v786 = reverse(minus(v187, v327)); real2 v824 = plus(v791, v792); real2 v818 = reverse(minus(v791, v792)); real2 v808 = plus(v204, v345); real2 v802 = reverse(minus(v204, v345)); real2 v840 = plus(v807, v808); real2 v834 = reverse(minus(v807, v808)); real2 v850 = reverse(minus(v839, v840)); real2 v856 = plus(v839, v840); real2 v759 = plus(v77, v222); real2 v755 = minus(v222, v77); real2 v823 = plus(v759, v760); real2 v819 = minus(v760, v759); real2 v855 = plus(v823, v824); store(out, 33 << shift, minus(v855, v856)); store(out, 1 << shift, plus(v855, v856)); real2 v851 = minus(v824, v823); store(out, 49 << shift, minus(uminusplus(v850), v851)); store(out, 17 << shift, minus(uplusminus(v850), v851)); real2 v836 = minusplus(v834, v835); real2 v838 = minusplus(uminus(v834), v835); real2 v844 = ctimesminusplus(reverse(v836), ctbl[1], ctimes(v836, ctbl[1])); real2 v828 = minus(uplusminus(v818), v819); store(out, 41 << shift, minus(v828, v844)); store(out, 9 << shift, plus(v828, v844)); real2 v832 = minus(uminusplus(v818), v819); real2 v848 = ctimesminusplus(reverse(v838), ctbl[1], ctimes(v838, ctbl[0])); store(out, 25 << shift, plus(v832, v848)); store(out, 57 << shift, minus(v832, v848)); real2 v774 = minusplus(uminus(v770), v771); real2 v772 = minusplus(v770, v771); real2 v790 = minusplus(uminus(v786), v787); real2 v788 = minusplus(v786, v787); real2 v796 = ctimesminusplus(reverse(v788), ctbl[1], ctimes(v788, ctbl[1])); real2 v780 = ctimesminusplus(reverse(v772), ctbl[3], ctimes(v772, ctbl[5])); real2 v764 = minus(uplusminus(v754), v755); real2 v768 = minus(uminusplus(v754), v755); real2 v876 = minus(v796, v764); real2 v880 = plus(v764, v796); real2 v806 = minusplus(uminus(v802), v803); real2 v804 = minusplus(v802, v803); real2 v812 = ctimesminusplus(reverse(v804), ctbl[5], ctimes(v804, ctbl[3])); real2 v881 = plus(v780, v812); real2 v875 = reverse(minus(v780, v812)); store(out, 21 << shift, minus(uplusminus(v875), v876)); store(out, 53 << shift, minus(uminusplus(v875), v876)); store(out, 5 << shift, plus(v880, v881)); store(out, 37 << shift, minus(v880, v881)); real2 v800 = ctimesminusplus(reverse(v790), ctbl[1], ctimes(v790, ctbl[0])); real2 v784 = ctimesminusplus(reverse(v774), ctbl[5], ctimes(v774, ctbl[3])); real2 v816 = ctimesminusplus(reverse(v806), ctbl[4], ctimes(v806, ctbl[2])); real2 v900 = plus(v784, v816); real2 v894 = reverse(minus(v784, v816)); real2 v899 = plus(v768, v800); store(out, 45 << shift, minus(v899, v900)); store(out, 13 << shift, plus(v899, v900)); real2 v895 = minus(v800, v768); store(out, 61 << shift, minus(uminusplus(v894), v895)); store(out, 29 << shift, minus(uplusminus(v894), v895)); // Pres : 15312 } } ALIGNED(8192) void dft64b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; // Pres : 27598 real2 v27 = load(in, 25 << shift); real2 v59 = load(in, 57 << shift); real2 v241 = plus(v27, v59); real2 v235 = reverse(minus(v59, v27)); real2 v43 = load(in, 41 << shift); real2 v11 = load(in, 9 << shift); real2 v236 = minus(v43, v11); real2 v240 = plus(v11, v43); real2 v375 = plus(v240, v241); real2 v369 = reverse(minus(v241, v240)); real2 v237 = minusplus(v235, v236); real2 v239 = minusplus(uminus(v235), v236); real2 v249 = ctimesminusplus(reverse(v239), ctbl[24], ctimes(v239, ctbl[18])); real2 v245 = ctimesminusplus(reverse(v237), ctbl[26], ctimes(v237, ctbl[27])); real2 v3 = load(in, 1 << shift); real2 v35 = load(in, 33 << shift); real2 v84 = minus(v35, v3); real2 v88 = plus(v3, v35); real2 v51 = load(in, 49 << shift); real2 v19 = load(in, 17 << shift); real2 v83 = reverse(minus(v51, v19)); real2 v89 = plus(v19, v51); real2 v370 = minus(v89, v88); real2 v374 = plus(v88, v89); real2 v85 = minusplus(v83, v84); real2 v87 = minusplus(uminus(v83), v84); real2 v101 = ctimesminusplus(reverse(v87), ctbl[20], ctimes(v87, ctbl[21])); real2 v498 = minus(v375, v374); real2 v502 = plus(v374, v375); real2 v934 = plus(v101, v249); real2 v930 = minus(v249, v101); real2 v373 = minusplus(uminus(v369), v370); real2 v371 = minusplus(v369, v370); real2 v379 = ctimesminusplus(reverse(v371), ctbl[12], ctimes(v371, ctbl[13])); real2 v383 = ctimesminusplus(reverse(v373), ctbl[8], ctimes(v373, ctbl[9])); real2 v95 = ctimesminusplus(reverse(v85), ctbl[28], ctimes(v85, ctbl[29])); real2 v771 = minus(v245, v95); real2 v775 = plus(v95, v245); real2 v7 = load(in, 5 << shift); real2 v39 = load(in, 37 << shift); real2 v166 = plus(v7, v39); real2 v162 = minus(v39, v7); real2 v23 = load(in, 21 << shift); real2 v55 = load(in, 53 << shift); real2 v161 = reverse(minus(v55, v23)); real2 v167 = plus(v23, v55); real2 v163 = minusplus(v161, v162); real2 v165 = minusplus(uminus(v161), v162); real2 v434 = minus(v167, v166); real2 v438 = plus(v166, v167); real2 v179 = ctimesminusplus(reverse(v165), ctbl[14], ctimes(v165, ctbl[15])); real2 v173 = ctimesminusplus(reverse(v163), ctbl[24], ctimes(v163, ctbl[25])); real2 v15 = load(in, 13 << shift); real2 v47 = load(in, 45 << shift); real2 v307 = plus(v15, v47); real2 v303 = minus(v47, v15); real2 v63 = load(in, 61 << shift); real2 v31 = load(in, 29 << shift); real2 v308 = plus(v31, v63); real2 v302 = reverse(minus(v63, v31)); real2 v439 = plus(v307, v308); real2 v433 = reverse(minus(v308, v307)); real2 v437 = minusplus(uminus(v433), v434); real2 v435 = minusplus(v433, v434); real2 v443 = ctimesminusplus(reverse(v435), ctbl[10], ctimes(v435, ctbl[11])); real2 v497 = reverse(minus(v439, v438)); real2 v503 = plus(v438, v439); real2 v562 = minus(v503, v502); real2 v566 = plus(v502, v503); real2 v499 = minusplus(v497, v498); real2 v501 = minusplus(uminus(v497), v498); real2 v511 = ctimesminusplus(reverse(v501), ctbl[2], ctimes(v501, ctbl[3])); real2 v447 = ctimesminusplus(reverse(v437), ctbl[12], ctimes(v437, ctbl[6])); real2 v507 = ctimesminusplus(reverse(v499), ctbl[4], ctimes(v499, ctbl[5])); real2 v718 = plus(v383, v447); real2 v714 = minus(v447, v383); real2 v306 = minusplus(uminus(v302), v303); real2 v304 = minusplus(v302, v303); real2 v318 = ctimesminusplus(reverse(v306), ctbl[27], ctimes(v306, ctbl[26])); real2 v929 = reverse(minus(v318, v179)); real2 v935 = plus(v179, v318); real2 v931 = minusplus(v929, v930); real2 v933 = minusplus(uminus(v929), v930); real2 v998 = plus(v934, v935); real2 v994 = minus(v935, v934); real2 v661 = plus(v379, v443); real2 v657 = minus(v443, v379); real2 v939 = ctimesminusplus(reverse(v931), ctbl[4], ctimes(v931, ctbl[5])); real2 v943 = ctimesminusplus(reverse(v933), ctbl[2], ctimes(v933, ctbl[3])); real2 v45 = load(in, 43 << shift); real2 v13 = load(in, 11 << shift); real2 v274 = plus(v13, v45); real2 v270 = minus(v45, v13); real2 v29 = load(in, 27 << shift); real2 v61 = load(in, 59 << shift); real2 v269 = reverse(minus(v61, v29)); real2 v275 = plus(v29, v61); real2 v273 = minusplus(uminus(v269), v270); real2 v271 = minusplus(v269, v270); real2 v407 = plus(v274, v275); real2 v401 = reverse(minus(v275, v274)); real2 v284 = ctimesminusplus(reverse(v273), ctbl[15], ctimes(v273, ctbl[14])); real2 v5 = load(in, 3 << shift); real2 v37 = load(in, 35 << shift); real2 v128 = plus(v5, v37); real2 v124 = minus(v37, v5); real2 v53 = load(in, 51 << shift); real2 v21 = load(in, 19 << shift); real2 v129 = plus(v21, v53); real2 v123 = reverse(minus(v53, v21)); real2 v406 = plus(v128, v129); real2 v402 = minus(v129, v128); real2 v405 = minusplus(uminus(v401), v402); real2 v403 = minusplus(v401, v402); real2 v415 = ctimesminusplus(reverse(v405), ctbl[6], ctimes(v405, ctbl[12])); real2 v411 = ctimesminusplus(reverse(v403), ctbl[8], ctimes(v403, ctbl[9])); real2 v127 = minusplus(uminus(v123), v124); real2 v125 = minusplus(v123, v124); real2 v530 = minus(v407, v406); real2 v534 = plus(v406, v407); real2 v139 = ctimesminusplus(reverse(v127), ctbl[26], ctimes(v127, ctbl[27])); real2 v962 = minus(v284, v139); real2 v966 = plus(v139, v284); real2 v57 = load(in, 55 << shift); real2 v25 = load(in, 23 << shift); real2 v204 = plus(v25, v57); real2 v198 = reverse(minus(v57, v25)); real2 v9 = load(in, 7 << shift); real2 v41 = load(in, 39 << shift); real2 v199 = minus(v41, v9); real2 v203 = plus(v9, v41); real2 v202 = minusplus(uminus(v198), v199); real2 v200 = minusplus(v198, v199); real2 v470 = plus(v203, v204); real2 v466 = minus(v204, v203); real2 v215 = ctimesminusplus(reverse(v202), ctbl[18], ctimes(v202, ctbl[24])); real2 v17 = load(in, 15 << shift); real2 v49 = load(in, 47 << shift); real2 v338 = minus(v49, v17); real2 v342 = plus(v17, v49); real2 v33 = load(in, 31 << shift); real2 v65 = load(in, 63 << shift); real2 v337 = reverse(minus(v65, v33)); real2 v343 = plus(v33, v65); real2 v341 = minusplus(uminus(v337), v338); real2 v339 = minusplus(v337, v338); real2 v351 = ctimesminusplus(reverse(v341), ctbl[21], ctimes(v341, ctbl[20])); real2 v961 = reverse(minus(v351, v215)); real2 v967 = plus(v215, v351); real2 v465 = reverse(minus(v343, v342)); real2 v471 = plus(v342, v343); real2 v467 = minusplus(v465, v466); real2 v469 = minusplus(uminus(v465), v466); real2 v475 = ctimesminusplus(reverse(v467), ctbl[6], ctimes(v467, ctbl[7])); real2 v662 = plus(v411, v475); real2 v656 = reverse(minus(v475, v411)); real2 v529 = reverse(minus(v471, v470)); real2 v535 = plus(v470, v471); real2 v531 = minusplus(v529, v530); real2 v533 = minusplus(uminus(v529), v530); real2 v543 = ctimesminusplus(reverse(v533), ctbl[3], ctimes(v533, ctbl[2])); real2 v561 = reverse(minus(v535, v534)); real2 v567 = plus(v534, v535); real2 v565 = minusplus(uminus(v561), v562); real2 v563 = minusplus(v561, v562); real2 v571 = ctimesminusplus(reverse(v563), ctbl[0], ctimes(v563, ctbl[1])); real2 v583 = plus(v566, v567); real2 v577 = reverse(minus(v567, v566)); real2 v539 = ctimesminusplus(reverse(v531), ctbl[2], ctimes(v531, ctbl[3])); real2 v602 = reverse(minus(v539, v507)); real2 v608 = plus(v507, v539); real2 v993 = reverse(minus(v967, v966)); real2 v999 = plus(v966, v967); real2 v575 = ctimesminusplus(reverse(v565), ctbl[0], ctimes(v565, ctbl[0])); real2 v1009 = reverse(minus(v999, v998)); real2 v1015 = plus(v998, v999); real2 v479 = ctimesminusplus(reverse(v469), ctbl[9], ctimes(v469, ctbl[8])); real2 v713 = reverse(minus(v479, v415)); real2 v719 = plus(v415, v479); real2 v660 = minusplus(uminus(v656), v657); real2 v658 = minusplus(v656, v657); real2 v717 = minusplus(uminus(v713), v714); real2 v715 = minusplus(v713, v714); real2 v723 = ctimesminusplus(reverse(v715), ctbl[0], ctimes(v715, ctbl[1])); real2 v666 = ctimesminusplus(reverse(v658), ctbl[0], ctimes(v658, ctbl[1])); real2 v670 = ctimesminusplus(reverse(v660), ctbl[0], ctimes(v660, ctbl[0])); real2 v727 = ctimesminusplus(reverse(v717), ctbl[0], ctimes(v717, ctbl[0])); real2 v735 = plus(v718, v719); real2 v729 = reverse(minus(v719, v718)); real2 v621 = reverse(minus(v543, v511)); real2 v627 = plus(v511, v543); real2 v672 = reverse(minus(v662, v661)); real2 v678 = plus(v661, v662); real2 v28 = load(in, 26 << shift); real2 v60 = load(in, 58 << shift); real2 v251 = reverse(minus(v60, v28)); real2 v257 = plus(v28, v60); real2 v44 = load(in, 42 << shift); real2 v12 = load(in, 10 << shift); real2 v256 = plus(v12, v44); real2 v252 = minus(v44, v12); real2 v391 = plus(v256, v257); real2 v385 = reverse(minus(v257, v256)); real2 v36 = load(in, 34 << shift); real2 v4 = load(in, 2 << shift); real2 v104 = minus(v36, v4); real2 v108 = plus(v4, v36); real2 v20 = load(in, 18 << shift); real2 v52 = load(in, 50 << shift); real2 v109 = plus(v20, v52); real2 v103 = reverse(minus(v52, v20)); real2 v390 = plus(v108, v109); real2 v386 = minus(v109, v108); real2 v514 = minus(v391, v390); real2 v518 = plus(v390, v391); real2 v387 = minusplus(v385, v386); real2 v389 = minusplus(uminus(v385), v386); real2 v399 = ctimesminusplus(reverse(v389), ctbl[2], ctimes(v389, ctbl[3])); real2 v395 = ctimesminusplus(reverse(v387), ctbl[4], ctimes(v387, ctbl[5])); real2 v40 = load(in, 38 << shift); real2 v8 = load(in, 6 << shift); real2 v182 = minus(v40, v8); real2 v186 = plus(v8, v40); real2 v56 = load(in, 54 << shift); real2 v24 = load(in, 22 << shift); real2 v187 = plus(v24, v56); real2 v181 = reverse(minus(v56, v24)); real2 v454 = plus(v186, v187); real2 v450 = minus(v187, v186); real2 v48 = load(in, 46 << shift); real2 v16 = load(in, 14 << shift); real2 v325 = plus(v16, v48); real2 v321 = minus(v48, v16); real2 v32 = load(in, 30 << shift); real2 v64 = load(in, 62 << shift); real2 v326 = plus(v32, v64); real2 v320 = reverse(minus(v64, v32)); real2 v455 = plus(v325, v326); real2 v449 = reverse(minus(v326, v325)); real2 v513 = reverse(minus(v455, v454)); real2 v519 = plus(v454, v455); real2 v515 = minusplus(v513, v514); real2 v517 = minusplus(uminus(v513), v514); real2 v551 = plus(v518, v519); real2 v545 = reverse(minus(v519, v518)); real2 v523 = ctimesminusplus(reverse(v515), ctbl[0], ctimes(v515, ctbl[1])); real2 v527 = ctimesminusplus(reverse(v517), ctbl[0], ctimes(v517, ctbl[0])); real2 v58 = load(in, 56 << shift); real2 v26 = load(in, 24 << shift); real2 v217 = reverse(minus(v58, v26)); real2 v223 = plus(v26, v58); real2 v50 = load(in, 48 << shift); real2 v18 = load(in, 16 << shift); real2 v67 = reverse(minus(v50, v18)); real2 v73 = plus(v18, v50); real2 v2 = load(in, 0 << shift); real2 v34 = load(in, 32 << shift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v354 = minus(v73, v72); real2 v358 = plus(v72, v73); real2 v10 = load(in, 8 << shift); real2 v42 = load(in, 40 << shift); real2 v222 = plus(v10, v42); real2 v218 = minus(v42, v10); real2 v359 = plus(v222, v223); real2 v353 = reverse(minus(v223, v222)); real2 v482 = minus(v359, v358); real2 v486 = plus(v358, v359); real2 v46 = load(in, 44 << shift); real2 v14 = load(in, 12 << shift); real2 v287 = minus(v46, v14); real2 v291 = plus(v14, v46); real2 v54 = load(in, 52 << shift); real2 v22 = load(in, 20 << shift); real2 v147 = plus(v22, v54); real2 v141 = reverse(minus(v54, v22)); real2 v38 = load(in, 36 << shift); real2 v6 = load(in, 4 << shift); real2 v142 = minus(v38, v6); real2 v146 = plus(v6, v38); real2 v418 = minus(v147, v146); real2 v422 = plus(v146, v147); real2 v62 = load(in, 60 << shift); real2 v30 = load(in, 28 << shift); real2 v286 = reverse(minus(v62, v30)); real2 v292 = plus(v30, v62); real2 v423 = plus(v291, v292); real2 v417 = reverse(minus(v292, v291)); real2 v481 = reverse(minus(v423, v422)); real2 v487 = plus(v422, v423); real2 v550 = plus(v486, v487); real2 v546 = minus(v487, v486); real2 v578 = minus(v551, v550); store(out, 16 << shift, minus(uplusminus(v577), v578)); store(out, 48 << shift, minus(uminusplus(v577), v578)); real2 v582 = plus(v550, v551); store(out, 0 << shift, plus(v582, v583)); store(out, 32 << shift, minus(v582, v583)); real2 v559 = minus(uminusplus(v545), v546); real2 v555 = minus(uplusminus(v545), v546); store(out, 40 << shift, minus(v555, v571)); store(out, 8 << shift, plus(v555, v571)); store(out, 56 << shift, minus(v559, v575)); store(out, 24 << shift, plus(v559, v575)); real2 v495 = minus(uminusplus(v481), v482); real2 v491 = minus(uplusminus(v481), v482); real2 v626 = plus(v495, v527); store(out, 12 << shift, plus(v626, v627)); store(out, 44 << shift, minus(v626, v627)); real2 v622 = minus(v527, v495); store(out, 60 << shift, minus(uminusplus(v621), v622)); store(out, 28 << shift, minus(uplusminus(v621), v622)); real2 v607 = plus(v491, v523); real2 v603 = minus(v523, v491); store(out, 4 << shift, plus(v607, v608)); store(out, 36 << shift, minus(v607, v608)); store(out, 20 << shift, minus(uplusminus(v602), v603)); store(out, 52 << shift, minus(uminusplus(v602), v603)); real2 v367 = minus(uminusplus(v353), v354); real2 v363 = minus(uplusminus(v353), v354); real2 v421 = minusplus(uminus(v417), v418); real2 v419 = minusplus(v417, v418); real2 v431 = ctimesminusplus(reverse(v421), ctbl[0], ctimes(v421, ctbl[0])); real2 v451 = minusplus(v449, v450); real2 v453 = minusplus(uminus(v449), v450); real2 v463 = ctimesminusplus(reverse(v453), ctbl[3], ctimes(v453, ctbl[2])); real2 v697 = reverse(minus(v463, v399)); real2 v703 = plus(v399, v463); real2 v698 = minus(v431, v367); real2 v702 = plus(v367, v431); real2 v711 = minus(uminusplus(v697), v698); store(out, 30 << shift, plus(v711, v727)); store(out, 62 << shift, minus(v711, v727)); real2 v707 = minus(uplusminus(v697), v698); store(out, 46 << shift, minus(v707, v723)); store(out, 14 << shift, plus(v707, v723)); real2 v734 = plus(v702, v703); store(out, 6 << shift, plus(v734, v735)); store(out, 38 << shift, minus(v734, v735)); real2 v730 = minus(v703, v702); store(out, 54 << shift, minus(uminusplus(v729), v730)); store(out, 22 << shift, minus(uplusminus(v729), v730)); real2 v459 = ctimesminusplus(reverse(v451), ctbl[2], ctimes(v451, ctbl[3])); real2 v640 = reverse(minus(v459, v395)); real2 v646 = plus(v395, v459); real2 v427 = ctimesminusplus(reverse(v419), ctbl[0], ctimes(v419, ctbl[1])); real2 v641 = minus(v427, v363); real2 v645 = plus(v363, v427); real2 v654 = minus(uminusplus(v640), v641); real2 v650 = minus(uplusminus(v640), v641); store(out, 10 << shift, plus(v650, v666)); store(out, 42 << shift, minus(v650, v666)); store(out, 26 << shift, plus(v654, v670)); store(out, 58 << shift, minus(v654, v670)); real2 v673 = minus(v646, v645); store(out, 18 << shift, minus(uplusminus(v672), v673)); store(out, 50 << shift, minus(uminusplus(v672), v673)); real2 v677 = plus(v645, v646); store(out, 2 << shift, plus(v677, v678)); store(out, 34 << shift, minus(v677, v678)); real2 v105 = minusplus(v103, v104); real2 v107 = minusplus(uminus(v103), v104); real2 v253 = minusplus(v251, v252); real2 v255 = minusplus(uminus(v251), v252); real2 v267 = ctimesminusplus(reverse(v255), ctbl[12], ctimes(v255, ctbl[6])); real2 v121 = ctimesminusplus(reverse(v107), ctbl[8], ctimes(v107, ctbl[9])); real2 v950 = plus(v121, v267); real2 v946 = minus(v267, v121); real2 v290 = minusplus(uminus(v286), v287); real2 v288 = minusplus(v286, v287); real2 v143 = minusplus(v141, v142); real2 v145 = minusplus(uminus(v141), v142); real2 v159 = ctimesminusplus(reverse(v145), ctbl[2], ctimes(v145, ctbl[3])); real2 v300 = ctimesminusplus(reverse(v290), ctbl[3], ctimes(v290, ctbl[2])); real2 v919 = plus(v159, v300); real2 v913 = reverse(minus(v300, v159)); real2 v219 = minusplus(v217, v218); real2 v221 = minusplus(uminus(v217), v218); real2 v185 = minusplus(uminus(v181), v182); real2 v183 = minusplus(v181, v182); real2 v196 = ctimesminusplus(reverse(v185), ctbl[6], ctimes(v185, ctbl[12])); real2 v233 = ctimesminusplus(reverse(v221), ctbl[0], ctimes(v221, ctbl[0])); real2 v324 = minusplus(uminus(v320), v321); real2 v322 = minusplus(v320, v321); real2 v335 = ctimesminusplus(reverse(v324), ctbl[9], ctimes(v324, ctbl[8])); real2 v945 = reverse(minus(v335, v196)); real2 v951 = plus(v196, v335); real2 v977 = reverse(minus(v951, v950)); real2 v983 = plus(v950, v951); real2 v77 = minus(uplusminus(v67), v68); real2 v81 = minus(uminusplus(v67), v68); real2 v914 = minus(v233, v81); real2 v918 = plus(v81, v233); real2 v982 = plus(v918, v919); real2 v978 = minus(v919, v918); real2 v1014 = plus(v982, v983); store(out, 3 << shift, plus(v1014, v1015)); store(out, 35 << shift, minus(v1014, v1015)); real2 v1010 = minus(v983, v982); store(out, 19 << shift, minus(uplusminus(v1009), v1010)); store(out, 51 << shift, minus(uminusplus(v1009), v1010)); real2 v995 = minusplus(v993, v994); real2 v997 = minusplus(uminus(v993), v994); real2 v1007 = ctimesminusplus(reverse(v997), ctbl[0], ctimes(v997, ctbl[0])); real2 v987 = minus(uplusminus(v977), v978); real2 v991 = minus(uminusplus(v977), v978); store(out, 27 << shift, plus(v991, v1007)); store(out, 59 << shift, minus(v991, v1007)); real2 v1003 = ctimesminusplus(reverse(v995), ctbl[0], ctimes(v995, ctbl[1])); store(out, 43 << shift, minus(v987, v1003)); store(out, 11 << shift, plus(v987, v1003)); real2 v965 = minusplus(uminus(v961), v962); real2 v963 = minusplus(v961, v962); real2 v975 = ctimesminusplus(reverse(v965), ctbl[3], ctimes(v965, ctbl[2])); real2 v1059 = plus(v943, v975); real2 v1053 = reverse(minus(v975, v943)); real2 v947 = minusplus(v945, v946); real2 v949 = minusplus(uminus(v945), v946); real2 v959 = ctimesminusplus(reverse(v949), ctbl[0], ctimes(v949, ctbl[0])); real2 v927 = minus(uminusplus(v913), v914); real2 v923 = minus(uplusminus(v913), v914); real2 v1058 = plus(v927, v959); store(out, 15 << shift, plus(v1058, v1059)); store(out, 47 << shift, minus(v1058, v1059)); real2 v1054 = minus(v959, v927); store(out, 63 << shift, minus(uminusplus(v1053), v1054)); store(out, 31 << shift, minus(uplusminus(v1053), v1054)); real2 v955 = ctimesminusplus(reverse(v947), ctbl[0], ctimes(v947, ctbl[1])); real2 v971 = ctimesminusplus(reverse(v963), ctbl[2], ctimes(v963, ctbl[3])); real2 v1034 = reverse(minus(v971, v939)); real2 v1040 = plus(v939, v971); real2 v1035 = minus(v955, v923); store(out, 55 << shift, minus(uminusplus(v1034), v1035)); store(out, 23 << shift, minus(uplusminus(v1034), v1035)); real2 v1039 = plus(v923, v955); store(out, 39 << shift, minus(v1039, v1040)); store(out, 7 << shift, plus(v1039, v1040)); real2 v263 = ctimesminusplus(reverse(v253), ctbl[10], ctimes(v253, ctbl[11])); real2 v191 = ctimesminusplus(reverse(v183), ctbl[8], ctimes(v183, ctbl[9])); real2 v115 = ctimesminusplus(reverse(v105), ctbl[12], ctimes(v105, ctbl[13])); real2 v787 = minus(v263, v115); real2 v791 = plus(v115, v263); real2 v331 = ctimesminusplus(reverse(v322), ctbl[6], ctimes(v322, ctbl[7])); real2 v786 = reverse(minus(v331, v191)); real2 v792 = plus(v191, v331); real2 v280 = ctimesminusplus(reverse(v271), ctbl[18], ctimes(v271, ctbl[19])); real2 v133 = ctimesminusplus(reverse(v125), ctbl[20], ctimes(v125, ctbl[21])); real2 v807 = plus(v133, v280); real2 v803 = minus(v280, v133); real2 v210 = ctimesminusplus(reverse(v200), ctbl[16], ctimes(v200, ctbl[17])); real2 v153 = ctimesminusplus(reverse(v143), ctbl[4], ctimes(v143, ctbl[5])); real2 v347 = ctimesminusplus(reverse(v339), ctbl[14], ctimes(v339, ctbl[15])); real2 v808 = plus(v210, v347); real2 v802 = reverse(minus(v347, v210)); real2 v314 = ctimesminusplus(reverse(v304), ctbl[22], ctimes(v304, ctbl[23])); real2 v770 = reverse(minus(v314, v173)); real2 v776 = plus(v173, v314); real2 v839 = plus(v775, v776); real2 v835 = minus(v776, v775); real2 v818 = reverse(minus(v792, v791)); real2 v824 = plus(v791, v792); real2 v840 = plus(v807, v808); real2 v834 = reverse(minus(v808, v807)); real2 v856 = plus(v839, v840); real2 v850 = reverse(minus(v840, v839)); real2 v229 = ctimesminusplus(reverse(v219), ctbl[0], ctimes(v219, ctbl[1])); real2 v296 = ctimesminusplus(reverse(v288), ctbl[2], ctimes(v288, ctbl[3])); real2 v760 = plus(v153, v296); real2 v754 = reverse(minus(v296, v153)); real2 v759 = plus(v77, v229); real2 v755 = minus(v229, v77); real2 v823 = plus(v759, v760); real2 v819 = minus(v760, v759); real2 v855 = plus(v823, v824); store(out, 33 << shift, minus(v855, v856)); store(out, 1 << shift, plus(v855, v856)); real2 v851 = minus(v824, v823); store(out, 17 << shift, minus(uplusminus(v850), v851)); store(out, 49 << shift, minus(uminusplus(v850), v851)); real2 v838 = minusplus(uminus(v834), v835); real2 v836 = minusplus(v834, v835); real2 v844 = ctimesminusplus(reverse(v836), ctbl[0], ctimes(v836, ctbl[1])); real2 v832 = minus(uminusplus(v818), v819); real2 v828 = minus(uplusminus(v818), v819); store(out, 41 << shift, minus(v828, v844)); store(out, 9 << shift, plus(v828, v844)); real2 v848 = ctimesminusplus(reverse(v838), ctbl[0], ctimes(v838, ctbl[0])); store(out, 25 << shift, plus(v832, v848)); store(out, 57 << shift, minus(v832, v848)); real2 v790 = minusplus(uminus(v786), v787); real2 v788 = minusplus(v786, v787); real2 v772 = minusplus(v770, v771); real2 v774 = minusplus(uminus(v770), v771); real2 v780 = ctimesminusplus(reverse(v772), ctbl[4], ctimes(v772, ctbl[5])); real2 v806 = minusplus(uminus(v802), v803); real2 v804 = minusplus(v802, v803); real2 v812 = ctimesminusplus(reverse(v804), ctbl[2], ctimes(v804, ctbl[3])); real2 v875 = reverse(minus(v812, v780)); real2 v881 = plus(v780, v812); real2 v796 = ctimesminusplus(reverse(v788), ctbl[0], ctimes(v788, ctbl[1])); real2 v764 = minus(uplusminus(v754), v755); real2 v768 = minus(uminusplus(v754), v755); real2 v880 = plus(v764, v796); real2 v876 = minus(v796, v764); store(out, 21 << shift, minus(uplusminus(v875), v876)); store(out, 53 << shift, minus(uminusplus(v875), v876)); store(out, 37 << shift, minus(v880, v881)); store(out, 5 << shift, plus(v880, v881)); real2 v800 = ctimesminusplus(reverse(v790), ctbl[0], ctimes(v790, ctbl[0])); real2 v784 = ctimesminusplus(reverse(v774), ctbl[2], ctimes(v774, ctbl[3])); real2 v816 = ctimesminusplus(reverse(v806), ctbl[3], ctimes(v806, ctbl[2])); real2 v900 = plus(v784, v816); real2 v894 = reverse(minus(v816, v784)); real2 v895 = minus(v800, v768); real2 v899 = plus(v768, v800); store(out, 45 << shift, minus(v899, v900)); store(out, 13 << shift, plus(v899, v900)); store(out, 61 << shift, minus(uminusplus(v894), v895)); store(out, 29 << shift, minus(uplusminus(v894), v895)); // Pres : 15320 } } ALIGNED(8192) void but64f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); // Pres : 30254 real2 v37 = load(in, 35 << inShift); real2 v5 = load(in, 3 << inShift); real2 v132 = plus(v5, v37); real2 v128 = minus(v37, v5); real2 v21 = load(in, 19 << inShift); real2 v53 = load(in, 51 << inShift); real2 v133 = plus(v21, v53); real2 v127 = reverse(minus(v21, v53)); real2 v131 = minusplus(uminus(v127), v128); real2 v129 = minusplus(v127, v128); real2 v139 = ctimesminusplus(reverse(v129), tbl[14 + tbloffset], ctimes(v129, tbl[15 + tbloffset])); real2 v145 = ctimesminusplus(reverse(v131), tbl[16 + tbloffset], ctimes(v131, tbl[17 + tbloffset])); real2 v448 = minus(v133, v132); real2 v452 = plus(v132, v133); real2 v45 = load(in, 43 << inShift); real2 v13 = load(in, 11 << inShift); real2 v292 = plus(v13, v45); real2 v288 = minus(v45, v13); real2 v29 = load(in, 27 << inShift); real2 v61 = load(in, 59 << inShift); real2 v293 = plus(v29, v61); real2 v287 = reverse(minus(v29, v61)); real2 v291 = minusplus(uminus(v287), v288); real2 v289 = minusplus(v287, v288); real2 v299 = ctimesminusplus(reverse(v289), tbl[46 + tbloffset], ctimes(v289, tbl[47 + tbloffset])); real2 v453 = plus(v292, v293); real2 v447 = reverse(minus(v292, v293)); real2 v608 = minus(v453, v452); real2 v612 = plus(v452, v453); real2 v980 = plus(v139, v299); real2 v976 = minus(v299, v139); real2 v449 = minusplus(v447, v448); real2 v451 = minusplus(uminus(v447), v448); real2 v465 = ctimesminusplus(reverse(v451), tbl[80 + tbloffset], ctimes(v451, tbl[81 + tbloffset])); real2 v305 = ctimesminusplus(reverse(v291), tbl[48 + tbloffset], ctimes(v291, tbl[49 + tbloffset])); real2 v1186 = minus(v305, v145); real2 v1190 = plus(v145, v305); real2 v459 = ctimesminusplus(reverse(v449), tbl[78 + tbloffset], ctimes(v449, tbl[79 + tbloffset])); real2 v25 = load(in, 23 << inShift); real2 v57 = load(in, 55 << inShift); real2 v207 = reverse(minus(v25, v57)); real2 v213 = plus(v25, v57); real2 v9 = load(in, 7 << inShift); real2 v41 = load(in, 39 << inShift); real2 v212 = plus(v9, v41); real2 v208 = minus(v41, v9); real2 v528 = minus(v213, v212); real2 v532 = plus(v212, v213); real2 v209 = minusplus(v207, v208); real2 v211 = minusplus(uminus(v207), v208); real2 v225 = ctimesminusplus(reverse(v211), tbl[32 + tbloffset], ctimes(v211, tbl[33 + tbloffset])); real2 v219 = ctimesminusplus(reverse(v209), tbl[30 + tbloffset], ctimes(v209, tbl[31 + tbloffset])); real2 v17 = load(in, 15 << inShift); real2 v49 = load(in, 47 << inShift); real2 v368 = minus(v49, v17); real2 v372 = plus(v17, v49); real2 v33 = load(in, 31 << inShift); real2 v65 = load(in, 63 << inShift); real2 v367 = reverse(minus(v33, v65)); real2 v373 = plus(v33, v65); real2 v369 = minusplus(v367, v368); real2 v371 = minusplus(uminus(v367), v368); real2 v533 = plus(v372, v373); real2 v527 = reverse(minus(v372, v373)); real2 v607 = reverse(minus(v532, v533)); real2 v613 = plus(v532, v533); real2 v529 = minusplus(v527, v528); real2 v531 = minusplus(uminus(v527), v528); real2 v545 = ctimesminusplus(reverse(v531), tbl[96 + tbloffset], ctimes(v531, tbl[97 + tbloffset])); real2 v653 = plus(v612, v613); real2 v647 = reverse(minus(v612, v613)); real2 v609 = minusplus(v607, v608); real2 v611 = minusplus(uminus(v607), v608); real2 v863 = plus(v465, v545); real2 v857 = reverse(minus(v465, v545)); real2 v539 = ctimesminusplus(reverse(v529), tbl[94 + tbloffset], ctimes(v529, tbl[95 + tbloffset])); real2 v385 = ctimesminusplus(reverse(v371), tbl[64 + tbloffset], ctimes(v371, tbl[65 + tbloffset])); real2 v619 = ctimesminusplus(reverse(v609), tbl[110 + tbloffset], ctimes(v609, tbl[111 + tbloffset])); real2 v1191 = plus(v225, v385); real2 v1185 = reverse(minus(v225, v385)); real2 v779 = reverse(minus(v459, v539)); real2 v785 = plus(v459, v539); real2 v625 = ctimesminusplus(reverse(v611), tbl[112 + tbloffset], ctimes(v611, tbl[113 + tbloffset])); real2 v379 = ctimesminusplus(reverse(v369), tbl[62 + tbloffset], ctimes(v369, tbl[63 + tbloffset])); real2 v975 = reverse(minus(v219, v379)); real2 v981 = plus(v219, v379); real2 v977 = minusplus(v975, v976); real2 v979 = minusplus(uminus(v975), v976); real2 v987 = ctimesminusplus(reverse(v977), tbl[170 + tbloffset], ctimes(v977, tbl[171 + tbloffset])); real2 v993 = ctimesminusplus(reverse(v979), tbl[172 + tbloffset], ctimes(v979, tbl[173 + tbloffset])); real2 v1015 = reverse(minus(v980, v981)); real2 v1021 = plus(v980, v981); real2 v11 = load(in, 9 << inShift); real2 v43 = load(in, 41 << inShift); real2 v248 = minus(v43, v11); real2 v252 = plus(v11, v43); real2 v59 = load(in, 57 << inShift); real2 v27 = load(in, 25 << inShift); real2 v253 = plus(v27, v59); real2 v247 = reverse(minus(v27, v59)); real2 v413 = plus(v252, v253); real2 v407 = reverse(minus(v252, v253)); real2 v249 = minusplus(v247, v248); real2 v251 = minusplus(uminus(v247), v248); real2 v259 = ctimesminusplus(reverse(v249), tbl[38 + tbloffset], ctimes(v249, tbl[39 + tbloffset])); real2 v35 = load(in, 33 << inShift); real2 v3 = load(in, 1 << inShift); real2 v92 = plus(v3, v35); real2 v88 = minus(v35, v3); real2 v51 = load(in, 49 << inShift); real2 v19 = load(in, 17 << inShift); real2 v87 = reverse(minus(v19, v51)); real2 v93 = plus(v19, v51); real2 v412 = plus(v92, v93); real2 v408 = minus(v93, v92); real2 v411 = minusplus(uminus(v407), v408); real2 v409 = minusplus(v407, v408); real2 v91 = minusplus(uminus(v87), v88); real2 v89 = minusplus(v87, v88); real2 v99 = ctimesminusplus(reverse(v89), tbl[6 + tbloffset], ctimes(v89, tbl[7 + tbloffset])); real2 v425 = ctimesminusplus(reverse(v411), tbl[72 + tbloffset], ctimes(v411, tbl[73 + tbloffset])); real2 v568 = minus(v413, v412); real2 v572 = plus(v412, v413); real2 v940 = plus(v99, v259); real2 v936 = minus(v259, v99); real2 v419 = ctimesminusplus(reverse(v409), tbl[70 + tbloffset], ctimes(v409, tbl[71 + tbloffset])); real2 v47 = load(in, 45 << inShift); real2 v15 = load(in, 13 << inShift); real2 v332 = plus(v15, v47); real2 v328 = minus(v47, v15); real2 v63 = load(in, 61 << inShift); real2 v31 = load(in, 29 << inShift); real2 v327 = reverse(minus(v31, v63)); real2 v333 = plus(v31, v63); real2 v329 = minusplus(v327, v328); real2 v331 = minusplus(uminus(v327), v328); real2 v339 = ctimesminusplus(reverse(v329), tbl[54 + tbloffset], ctimes(v329, tbl[55 + tbloffset])); real2 v487 = reverse(minus(v332, v333)); real2 v493 = plus(v332, v333); real2 v7 = load(in, 5 << inShift); real2 v39 = load(in, 37 << inShift); real2 v172 = plus(v7, v39); real2 v168 = minus(v39, v7); real2 v55 = load(in, 53 << inShift); real2 v23 = load(in, 21 << inShift); real2 v173 = plus(v23, v55); real2 v167 = reverse(minus(v23, v55)); real2 v488 = minus(v173, v172); real2 v492 = plus(v172, v173); real2 v491 = minusplus(uminus(v487), v488); real2 v489 = minusplus(v487, v488); real2 v499 = ctimesminusplus(reverse(v489), tbl[86 + tbloffset], ctimes(v489, tbl[87 + tbloffset])); real2 v505 = ctimesminusplus(reverse(v491), tbl[88 + tbloffset], ctimes(v491, tbl[89 + tbloffset])); real2 v567 = reverse(minus(v492, v493)); real2 v573 = plus(v492, v493); real2 v571 = minusplus(uminus(v567), v568); real2 v569 = minusplus(v567, v568); real2 v579 = ctimesminusplus(reverse(v569), tbl[102 + tbloffset], ctimes(v569, tbl[103 + tbloffset])); real2 v585 = ctimesminusplus(reverse(v571), tbl[104 + tbloffset], ctimes(v571, tbl[105 + tbloffset])); real2 v739 = plus(v585, v625); real2 v733 = reverse(minus(v585, v625)); real2 v707 = reverse(minus(v579, v619)); real2 v713 = plus(v579, v619); real2 v648 = minus(v573, v572); real2 v652 = plus(v572, v573); real2 v673 = plus(v652, v653); real2 v667 = reverse(minus(v652, v653)); real2 v651 = minusplus(uminus(v647), v648); real2 v649 = minusplus(v647, v648); real2 v659 = ctimesminusplus(reverse(v649), tbl[118 + tbloffset], ctimes(v649, tbl[119 + tbloffset])); real2 v665 = ctimesminusplus(reverse(v651), tbl[120 + tbloffset], ctimes(v651, tbl[121 + tbloffset])); real2 v780 = minus(v499, v419); real2 v784 = plus(v419, v499); real2 v781 = minusplus(v779, v780); real2 v783 = minusplus(uminus(v779), v780); real2 v805 = plus(v784, v785); real2 v799 = reverse(minus(v784, v785)); real2 v862 = plus(v425, v505); real2 v858 = minus(v505, v425); real2 v859 = minusplus(v857, v858); real2 v861 = minusplus(uminus(v857), v858); real2 v875 = ctimesminusplus(reverse(v861), tbl[152 + tbloffset], ctimes(v861, tbl[153 + tbloffset])); real2 v791 = ctimesminusplus(reverse(v781), tbl[138 + tbloffset], ctimes(v781, tbl[139 + tbloffset])); real2 v797 = ctimesminusplus(reverse(v783), tbl[140 + tbloffset], ctimes(v783, tbl[141 + tbloffset])); real2 v883 = plus(v862, v863); real2 v877 = reverse(minus(v862, v863)); real2 v869 = ctimesminusplus(reverse(v859), tbl[150 + tbloffset], ctimes(v859, tbl[151 + tbloffset])); real2 v36 = load(in, 34 << inShift); real2 v4 = load(in, 2 << inShift); real2 v108 = minus(v36, v4); real2 v112 = plus(v4, v36); real2 v52 = load(in, 50 << inShift); real2 v20 = load(in, 18 << inShift); real2 v113 = plus(v20, v52); real2 v107 = reverse(minus(v20, v52)); real2 v428 = minus(v113, v112); real2 v432 = plus(v112, v113); real2 v12 = load(in, 10 << inShift); real2 v44 = load(in, 42 << inShift); real2 v268 = minus(v44, v12); real2 v272 = plus(v12, v44); real2 v28 = load(in, 26 << inShift); real2 v60 = load(in, 58 << inShift); real2 v267 = reverse(minus(v28, v60)); real2 v273 = plus(v28, v60); real2 v427 = reverse(minus(v272, v273)); real2 v433 = plus(v272, v273); real2 v431 = minusplus(uminus(v427), v428); real2 v429 = minusplus(v427, v428); real2 v439 = ctimesminusplus(reverse(v429), tbl[74 + tbloffset], ctimes(v429, tbl[75 + tbloffset])); real2 v588 = minus(v433, v432); real2 v592 = plus(v432, v433); real2 v40 = load(in, 38 << inShift); real2 v8 = load(in, 6 << inShift); real2 v188 = minus(v40, v8); real2 v192 = plus(v8, v40); real2 v24 = load(in, 22 << inShift); real2 v56 = load(in, 54 << inShift); real2 v187 = reverse(minus(v24, v56)); real2 v193 = plus(v24, v56); real2 v512 = plus(v192, v193); real2 v508 = minus(v193, v192); real2 v32 = load(in, 30 << inShift); real2 v64 = load(in, 62 << inShift); real2 v347 = reverse(minus(v32, v64)); real2 v353 = plus(v32, v64); real2 v48 = load(in, 46 << inShift); real2 v16 = load(in, 14 << inShift); real2 v348 = minus(v48, v16); real2 v352 = plus(v16, v48); real2 v513 = plus(v352, v353); real2 v507 = reverse(minus(v352, v353)); real2 v587 = reverse(minus(v512, v513)); real2 v593 = plus(v512, v513); real2 v633 = plus(v592, v593); real2 v627 = reverse(minus(v592, v593)); real2 v591 = minusplus(uminus(v587), v588); real2 v589 = minusplus(v587, v588); real2 v605 = ctimesminusplus(reverse(v591), tbl[108 + tbloffset], ctimes(v591, tbl[109 + tbloffset])); real2 v599 = ctimesminusplus(reverse(v589), tbl[106 + tbloffset], ctimes(v589, tbl[107 + tbloffset])); real2 v46 = load(in, 44 << inShift); real2 v14 = load(in, 12 << inShift); real2 v312 = plus(v14, v46); real2 v308 = minus(v46, v14); real2 v62 = load(in, 60 << inShift); real2 v30 = load(in, 28 << inShift); real2 v313 = plus(v30, v62); real2 v307 = reverse(minus(v30, v62)); real2 v467 = reverse(minus(v312, v313)); real2 v473 = plus(v312, v313); real2 v22 = load(in, 20 << inShift); real2 v54 = load(in, 52 << inShift); real2 v147 = reverse(minus(v22, v54)); real2 v153 = plus(v22, v54); real2 v6 = load(in, 4 << inShift); real2 v38 = load(in, 36 << inShift); real2 v148 = minus(v38, v6); real2 v152 = plus(v6, v38); real2 v472 = plus(v152, v153); real2 v468 = minus(v153, v152); real2 v547 = reverse(minus(v472, v473)); real2 v553 = plus(v472, v473); real2 v10 = load(in, 8 << inShift); real2 v42 = load(in, 40 << inShift); real2 v232 = plus(v10, v42); real2 v228 = minus(v42, v10); real2 v58 = load(in, 56 << inShift); real2 v26 = load(in, 24 << inShift); real2 v233 = plus(v26, v58); real2 v227 = reverse(minus(v26, v58)); real2 v393 = plus(v232, v233); real2 v387 = reverse(minus(v232, v233)); real2 v2 = load(in, 0 << inShift); real2 v34 = load(in, 32 << inShift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v18 = load(in, 16 << inShift); real2 v50 = load(in, 48 << inShift); real2 v73 = plus(v18, v50); real2 v67 = reverse(minus(v18, v50)); real2 v388 = minus(v73, v72); real2 v392 = plus(v72, v73); real2 v548 = minus(v393, v392); real2 v552 = plus(v392, v393); real2 v628 = minus(v553, v552); real2 v632 = plus(v552, v553); real2 v672 = plus(v632, v633); real2 v668 = minus(v633, v632); store(out, 0 << outShift, plus(v672, v673)); real2 v686 = minus(v672, v673); store(out, 32 << outShift, ctimesminusplus(v686, tbl[0 + tbloffset], ctimes(reverse(v686), tbl[1 + tbloffset]))); real2 v669 = minusplus(v667, v668); real2 v671 = minusplus(uminus(v667), v668); store(out, 48 << outShift, ctimesminusplus(reverse(v671), tbl[124 + tbloffset], ctimes(v671, tbl[125 + tbloffset]))); store(out, 16 << outShift, ctimesminusplus(reverse(v669), tbl[122 + tbloffset], ctimes(v669, tbl[123 + tbloffset]))); real2 v631 = minusplus(uminus(v627), v628); real2 v629 = minusplus(v627, v628); real2 v639 = ctimesminusplus(reverse(v629), tbl[114 + tbloffset], ctimes(v629, tbl[115 + tbloffset])); store(out, 8 << outShift, plus(v639, v659)); real2 v694 = minus(v639, v659); store(out, 40 << outShift, ctimesminusplus(v694, tbl[0 + tbloffset], ctimes(reverse(v694), tbl[1 + tbloffset]))); real2 v645 = ctimesminusplus(reverse(v631), tbl[116 + tbloffset], ctimes(v631, tbl[117 + tbloffset])); store(out, 24 << outShift, plus(v645, v665)); real2 v700 = minus(v645, v665); store(out, 56 << outShift, ctimesminusplus(v700, tbl[0 + tbloffset], ctimes(reverse(v700), tbl[1 + tbloffset]))); real2 v549 = minusplus(v547, v548); real2 v551 = minusplus(uminus(v547), v548); real2 v559 = ctimesminusplus(reverse(v549), tbl[98 + tbloffset], ctimes(v549, tbl[99 + tbloffset])); real2 v708 = minus(v599, v559); real2 v712 = plus(v559, v599); store(out, 4 << outShift, plus(v712, v713)); real2 v726 = minus(v712, v713); store(out, 36 << outShift, ctimesminusplus(v726, tbl[0 + tbloffset], ctimes(reverse(v726), tbl[1 + tbloffset]))); real2 v711 = minusplus(uminus(v707), v708); real2 v709 = minusplus(v707, v708); store(out, 20 << outShift, ctimesminusplus(reverse(v709), tbl[126 + tbloffset], ctimes(v709, tbl[127 + tbloffset]))); store(out, 52 << outShift, ctimesminusplus(reverse(v711), tbl[128 + tbloffset], ctimes(v711, tbl[129 + tbloffset]))); real2 v565 = ctimesminusplus(reverse(v551), tbl[100 + tbloffset], ctimes(v551, tbl[101 + tbloffset])); real2 v738 = plus(v565, v605); real2 v734 = minus(v605, v565); store(out, 12 << outShift, plus(v738, v739)); real2 v752 = minus(v738, v739); store(out, 44 << outShift, ctimesminusplus(v752, tbl[0 + tbloffset], ctimes(reverse(v752), tbl[1 + tbloffset]))); real2 v737 = minusplus(uminus(v733), v734); store(out, 60 << outShift, ctimesminusplus(reverse(v737), tbl[132 + tbloffset], ctimes(v737, tbl[133 + tbloffset]))); real2 v735 = minusplus(v733, v734); store(out, 28 << outShift, ctimesminusplus(reverse(v735), tbl[130 + tbloffset], ctimes(v735, tbl[131 + tbloffset]))); real2 v471 = minusplus(uminus(v467), v468); real2 v469 = minusplus(v467, v468); real2 v479 = ctimesminusplus(reverse(v469), tbl[82 + tbloffset], ctimes(v469, tbl[83 + tbloffset])); real2 v511 = minusplus(uminus(v507), v508); real2 v509 = minusplus(v507, v508); real2 v519 = ctimesminusplus(reverse(v509), tbl[90 + tbloffset], ctimes(v509, tbl[91 + tbloffset])); real2 v765 = plus(v439, v519); real2 v759 = reverse(minus(v439, v519)); real2 v389 = minusplus(v387, v388); real2 v391 = minusplus(uminus(v387), v388); real2 v399 = ctimesminusplus(reverse(v389), tbl[66 + tbloffset], ctimes(v389, tbl[67 + tbloffset])); real2 v764 = plus(v399, v479); real2 v760 = minus(v479, v399); real2 v804 = plus(v764, v765); real2 v800 = minus(v765, v764); store(out, 2 << outShift, plus(v804, v805)); real2 v818 = minus(v804, v805); store(out, 34 << outShift, ctimesminusplus(v818, tbl[0 + tbloffset], ctimes(reverse(v818), tbl[1 + tbloffset]))); real2 v803 = minusplus(uminus(v799), v800); store(out, 50 << outShift, ctimesminusplus(reverse(v803), tbl[144 + tbloffset], ctimes(v803, tbl[145 + tbloffset]))); real2 v801 = minusplus(v799, v800); store(out, 18 << outShift, ctimesminusplus(reverse(v801), tbl[142 + tbloffset], ctimes(v801, tbl[143 + tbloffset]))); real2 v763 = minusplus(uminus(v759), v760); real2 v761 = minusplus(v759, v760); real2 v777 = ctimesminusplus(reverse(v763), tbl[136 + tbloffset], ctimes(v763, tbl[137 + tbloffset])); store(out, 26 << outShift, plus(v777, v797)); real2 v830 = minus(v777, v797); store(out, 58 << outShift, ctimesminusplus(v830, tbl[0 + tbloffset], ctimes(reverse(v830), tbl[1 + tbloffset]))); real2 v771 = ctimesminusplus(reverse(v761), tbl[134 + tbloffset], ctimes(v761, tbl[135 + tbloffset])); store(out, 10 << outShift, plus(v771, v791)); real2 v824 = minus(v771, v791); store(out, 42 << outShift, ctimesminusplus(v824, tbl[0 + tbloffset], ctimes(reverse(v824), tbl[1 + tbloffset]))); real2 v445 = ctimesminusplus(reverse(v431), tbl[76 + tbloffset], ctimes(v431, tbl[77 + tbloffset])); real2 v525 = ctimesminusplus(reverse(v511), tbl[92 + tbloffset], ctimes(v511, tbl[93 + tbloffset])); real2 v837 = reverse(minus(v445, v525)); real2 v843 = plus(v445, v525); real2 v485 = ctimesminusplus(reverse(v471), tbl[84 + tbloffset], ctimes(v471, tbl[85 + tbloffset])); real2 v405 = ctimesminusplus(reverse(v391), tbl[68 + tbloffset], ctimes(v391, tbl[69 + tbloffset])); real2 v838 = minus(v485, v405); real2 v842 = plus(v405, v485); real2 v878 = minus(v843, v842); real2 v882 = plus(v842, v843); store(out, 6 << outShift, plus(v882, v883)); real2 v896 = minus(v882, v883); store(out, 38 << outShift, ctimesminusplus(v896, tbl[0 + tbloffset], ctimes(reverse(v896), tbl[1 + tbloffset]))); real2 v881 = minusplus(uminus(v877), v878); store(out, 54 << outShift, ctimesminusplus(reverse(v881), tbl[156 + tbloffset], ctimes(v881, tbl[157 + tbloffset]))); real2 v879 = minusplus(v877, v878); store(out, 22 << outShift, ctimesminusplus(reverse(v879), tbl[154 + tbloffset], ctimes(v879, tbl[155 + tbloffset]))); real2 v841 = minusplus(uminus(v837), v838); real2 v839 = minusplus(v837, v838); real2 v855 = ctimesminusplus(reverse(v841), tbl[148 + tbloffset], ctimes(v841, tbl[149 + tbloffset])); store(out, 30 << outShift, plus(v855, v875)); real2 v908 = minus(v855, v875); store(out, 62 << outShift, ctimesminusplus(v908, tbl[0 + tbloffset], ctimes(reverse(v908), tbl[1 + tbloffset]))); real2 v849 = ctimesminusplus(reverse(v839), tbl[146 + tbloffset], ctimes(v839, tbl[147 + tbloffset])); store(out, 14 << outShift, plus(v849, v869)); real2 v902 = minus(v849, v869); store(out, 46 << outShift, ctimesminusplus(v902, tbl[0 + tbloffset], ctimes(reverse(v902), tbl[1 + tbloffset]))); real2 v151 = minusplus(uminus(v147), v148); real2 v149 = minusplus(v147, v148); real2 v311 = minusplus(uminus(v307), v308); real2 v309 = minusplus(v307, v308); real2 v109 = minusplus(v107, v108); real2 v111 = minusplus(uminus(v107), v108); real2 v119 = ctimesminusplus(reverse(v109), tbl[10 + tbloffset], ctimes(v109, tbl[11 + tbloffset])); real2 v269 = minusplus(v267, v268); real2 v271 = minusplus(uminus(v267), v268); real2 v279 = ctimesminusplus(reverse(v269), tbl[42 + tbloffset], ctimes(v269, tbl[43 + tbloffset])); real2 v960 = plus(v119, v279); real2 v956 = minus(v279, v119); real2 v169 = minusplus(v167, v168); real2 v171 = minusplus(uminus(v167), v168); real2 v159 = ctimesminusplus(reverse(v149), tbl[18 + tbloffset], ctimes(v149, tbl[19 + tbloffset])); real2 v319 = ctimesminusplus(reverse(v309), tbl[50 + tbloffset], ctimes(v309, tbl[51 + tbloffset])); real2 v921 = plus(v159, v319); real2 v915 = reverse(minus(v159, v319)); real2 v351 = minusplus(uminus(v347), v348); real2 v349 = minusplus(v347, v348); real2 v359 = ctimesminusplus(reverse(v349), tbl[58 + tbloffset], ctimes(v349, tbl[59 + tbloffset])); real2 v191 = minusplus(uminus(v187), v188); real2 v189 = minusplus(v187, v188); real2 v199 = ctimesminusplus(reverse(v189), tbl[26 + tbloffset], ctimes(v189, tbl[27 + tbloffset])); real2 v961 = plus(v199, v359); real2 v955 = reverse(minus(v199, v359)); real2 v995 = reverse(minus(v960, v961)); real2 v1001 = plus(v960, v961); real2 v179 = ctimesminusplus(reverse(v169), tbl[22 + tbloffset], ctimes(v169, tbl[23 + tbloffset])); real2 v941 = plus(v179, v339); real2 v935 = reverse(minus(v179, v339)); real2 v1016 = minus(v941, v940); real2 v1020 = plus(v940, v941); real2 v71 = minusplus(uminus(v67), v68); real2 v69 = minusplus(v67, v68); real2 v79 = ctimesminusplus(reverse(v69), tbl[2 + tbloffset], ctimes(v69, tbl[3 + tbloffset])); real2 v1041 = plus(v1020, v1021); real2 v1035 = reverse(minus(v1020, v1021)); real2 v229 = minusplus(v227, v228); real2 v231 = minusplus(uminus(v227), v228); real2 v239 = ctimesminusplus(reverse(v229), tbl[34 + tbloffset], ctimes(v229, tbl[35 + tbloffset])); real2 v920 = plus(v79, v239); real2 v916 = minus(v239, v79); real2 v996 = minus(v921, v920); real2 v1000 = plus(v920, v921); real2 v1040 = plus(v1000, v1001); real2 v1036 = minus(v1001, v1000); store(out, 1 << outShift, plus(v1040, v1041)); real2 v1054 = minus(v1040, v1041); store(out, 33 << outShift, ctimesminusplus(v1054, tbl[0 + tbloffset], ctimes(reverse(v1054), tbl[1 + tbloffset]))); real2 v1037 = minusplus(v1035, v1036); real2 v1039 = minusplus(uminus(v1035), v1036); store(out, 49 << outShift, ctimesminusplus(reverse(v1039), tbl[184 + tbloffset], ctimes(v1039, tbl[185 + tbloffset]))); store(out, 17 << outShift, ctimesminusplus(reverse(v1037), tbl[182 + tbloffset], ctimes(v1037, tbl[183 + tbloffset]))); real2 v1017 = minusplus(v1015, v1016); real2 v1019 = minusplus(uminus(v1015), v1016); real2 v1033 = ctimesminusplus(reverse(v1019), tbl[180 + tbloffset], ctimes(v1019, tbl[181 + tbloffset])); real2 v997 = minusplus(v995, v996); real2 v999 = minusplus(uminus(v995), v996); real2 v1013 = ctimesminusplus(reverse(v999), tbl[176 + tbloffset], ctimes(v999, tbl[177 + tbloffset])); store(out, 25 << outShift, plus(v1013, v1033)); real2 v1066 = minus(v1013, v1033); store(out, 57 << outShift, ctimesminusplus(v1066, tbl[0 + tbloffset], ctimes(reverse(v1066), tbl[1 + tbloffset]))); real2 v1027 = ctimesminusplus(reverse(v1017), tbl[178 + tbloffset], ctimes(v1017, tbl[179 + tbloffset])); real2 v1007 = ctimesminusplus(reverse(v997), tbl[174 + tbloffset], ctimes(v997, tbl[175 + tbloffset])); store(out, 9 << outShift, plus(v1007, v1027)); real2 v1060 = minus(v1007, v1027); store(out, 41 << outShift, ctimesminusplus(v1060, tbl[0 + tbloffset], ctimes(reverse(v1060), tbl[1 + tbloffset]))); real2 v937 = minusplus(v935, v936); real2 v939 = minusplus(uminus(v935), v936); real2 v959 = minusplus(uminus(v955), v956); real2 v957 = minusplus(v955, v956); real2 v967 = ctimesminusplus(reverse(v957), tbl[166 + tbloffset], ctimes(v957, tbl[167 + tbloffset])); real2 v947 = ctimesminusplus(reverse(v937), tbl[162 + tbloffset], ctimes(v937, tbl[163 + tbloffset])); real2 v919 = minusplus(uminus(v915), v916); real2 v917 = minusplus(v915, v916); real2 v1079 = plus(v947, v987); real2 v1073 = reverse(minus(v947, v987)); real2 v927 = ctimesminusplus(reverse(v917), tbl[158 + tbloffset], ctimes(v917, tbl[159 + tbloffset])); real2 v1074 = minus(v967, v927); real2 v1078 = plus(v927, v967); store(out, 5 << outShift, plus(v1078, v1079)); real2 v1092 = minus(v1078, v1079); store(out, 37 << outShift, ctimesminusplus(v1092, tbl[0 + tbloffset], ctimes(reverse(v1092), tbl[1 + tbloffset]))); real2 v1075 = minusplus(v1073, v1074); store(out, 21 << outShift, ctimesminusplus(reverse(v1075), tbl[186 + tbloffset], ctimes(v1075, tbl[187 + tbloffset]))); real2 v1077 = minusplus(uminus(v1073), v1074); store(out, 53 << outShift, ctimesminusplus(reverse(v1077), tbl[188 + tbloffset], ctimes(v1077, tbl[189 + tbloffset]))); real2 v953 = ctimesminusplus(reverse(v939), tbl[164 + tbloffset], ctimes(v939, tbl[165 + tbloffset])); real2 v1099 = reverse(minus(v953, v993)); real2 v1105 = plus(v953, v993); real2 v973 = ctimesminusplus(reverse(v959), tbl[168 + tbloffset], ctimes(v959, tbl[169 + tbloffset])); real2 v933 = ctimesminusplus(reverse(v919), tbl[160 + tbloffset], ctimes(v919, tbl[161 + tbloffset])); real2 v1104 = plus(v933, v973); real2 v1100 = minus(v973, v933); store(out, 13 << outShift, plus(v1104, v1105)); real2 v1118 = minus(v1104, v1105); store(out, 45 << outShift, ctimesminusplus(v1118, tbl[0 + tbloffset], ctimes(reverse(v1118), tbl[1 + tbloffset]))); real2 v1101 = minusplus(v1099, v1100); store(out, 29 << outShift, ctimesminusplus(reverse(v1101), tbl[190 + tbloffset], ctimes(v1101, tbl[191 + tbloffset]))); real2 v1103 = minusplus(uminus(v1099), v1100); store(out, 61 << outShift, ctimesminusplus(reverse(v1103), tbl[192 + tbloffset], ctimes(v1103, tbl[193 + tbloffset]))); real2 v345 = ctimesminusplus(reverse(v331), tbl[56 + tbloffset], ctimes(v331, tbl[57 + tbloffset])); real2 v325 = ctimesminusplus(reverse(v311), tbl[52 + tbloffset], ctimes(v311, tbl[53 + tbloffset])); real2 v265 = ctimesminusplus(reverse(v251), tbl[40 + tbloffset], ctimes(v251, tbl[41 + tbloffset])); real2 v185 = ctimesminusplus(reverse(v171), tbl[24 + tbloffset], ctimes(v171, tbl[25 + tbloffset])); real2 v165 = ctimesminusplus(reverse(v151), tbl[20 + tbloffset], ctimes(v151, tbl[21 + tbloffset])); real2 v1131 = plus(v165, v325); real2 v1125 = reverse(minus(v165, v325)); real2 v1151 = plus(v185, v345); real2 v1145 = reverse(minus(v185, v345)); real2 v105 = ctimesminusplus(reverse(v91), tbl[8 + tbloffset], ctimes(v91, tbl[9 + tbloffset])); real2 v1150 = plus(v105, v265); real2 v1146 = minus(v265, v105); real2 v1226 = minus(v1151, v1150); real2 v1230 = plus(v1150, v1151); real2 v1231 = plus(v1190, v1191); real2 v1225 = reverse(minus(v1190, v1191)); real2 v1245 = reverse(minus(v1230, v1231)); real2 v1251 = plus(v1230, v1231); real2 v365 = ctimesminusplus(reverse(v351), tbl[60 + tbloffset], ctimes(v351, tbl[61 + tbloffset])); real2 v285 = ctimesminusplus(reverse(v271), tbl[44 + tbloffset], ctimes(v271, tbl[45 + tbloffset])); real2 v205 = ctimesminusplus(reverse(v191), tbl[28 + tbloffset], ctimes(v191, tbl[29 + tbloffset])); real2 v1171 = plus(v205, v365); real2 v1165 = reverse(minus(v205, v365)); real2 v125 = ctimesminusplus(reverse(v111), tbl[12 + tbloffset], ctimes(v111, tbl[13 + tbloffset])); real2 v85 = ctimesminusplus(reverse(v71), tbl[4 + tbloffset], ctimes(v71, tbl[5 + tbloffset])); real2 v245 = ctimesminusplus(reverse(v231), tbl[36 + tbloffset], ctimes(v231, tbl[37 + tbloffset])); real2 v1126 = minus(v245, v85); real2 v1130 = plus(v85, v245); real2 v1210 = plus(v1130, v1131); real2 v1206 = minus(v1131, v1130); real2 v1166 = minus(v285, v125); real2 v1170 = plus(v125, v285); real2 v1211 = plus(v1170, v1171); real2 v1205 = reverse(minus(v1170, v1171)); real2 v1246 = minus(v1211, v1210); real2 v1250 = plus(v1210, v1211); store(out, 3 << outShift, plus(v1250, v1251)); real2 v1264 = minus(v1250, v1251); store(out, 35 << outShift, ctimesminusplus(v1264, tbl[0 + tbloffset], ctimes(reverse(v1264), tbl[1 + tbloffset]))); real2 v1247 = minusplus(v1245, v1246); real2 v1249 = minusplus(uminus(v1245), v1246); store(out, 19 << outShift, ctimesminusplus(reverse(v1247), tbl[218 + tbloffset], ctimes(v1247, tbl[219 + tbloffset]))); store(out, 51 << outShift, ctimesminusplus(reverse(v1249), tbl[220 + tbloffset], ctimes(v1249, tbl[221 + tbloffset]))); real2 v1229 = minusplus(uminus(v1225), v1226); real2 v1227 = minusplus(v1225, v1226); real2 v1207 = minusplus(v1205, v1206); real2 v1209 = minusplus(uminus(v1205), v1206); real2 v1237 = ctimesminusplus(reverse(v1227), tbl[214 + tbloffset], ctimes(v1227, tbl[215 + tbloffset])); real2 v1217 = ctimesminusplus(reverse(v1207), tbl[210 + tbloffset], ctimes(v1207, tbl[211 + tbloffset])); store(out, 11 << outShift, plus(v1217, v1237)); real2 v1270 = minus(v1217, v1237); store(out, 43 << outShift, ctimesminusplus(v1270, tbl[0 + tbloffset], ctimes(reverse(v1270), tbl[1 + tbloffset]))); real2 v1223 = ctimesminusplus(reverse(v1209), tbl[212 + tbloffset], ctimes(v1209, tbl[213 + tbloffset])); real2 v1243 = ctimesminusplus(reverse(v1229), tbl[216 + tbloffset], ctimes(v1229, tbl[217 + tbloffset])); store(out, 27 << outShift, plus(v1223, v1243)); real2 v1276 = minus(v1223, v1243); store(out, 59 << outShift, ctimesminusplus(v1276, tbl[0 + tbloffset], ctimes(reverse(v1276), tbl[1 + tbloffset]))); real2 v1189 = minusplus(uminus(v1185), v1186); real2 v1187 = minusplus(v1185, v1186); real2 v1129 = minusplus(uminus(v1125), v1126); real2 v1127 = minusplus(v1125, v1126); real2 v1147 = minusplus(v1145, v1146); real2 v1149 = minusplus(uminus(v1145), v1146); real2 v1167 = minusplus(v1165, v1166); real2 v1169 = minusplus(uminus(v1165), v1166); real2 v1143 = ctimesminusplus(reverse(v1129), tbl[196 + tbloffset], ctimes(v1129, tbl[197 + tbloffset])); real2 v1163 = ctimesminusplus(reverse(v1149), tbl[200 + tbloffset], ctimes(v1149, tbl[201 + tbloffset])); real2 v1203 = ctimesminusplus(reverse(v1189), tbl[208 + tbloffset], ctimes(v1189, tbl[209 + tbloffset])); real2 v1315 = plus(v1163, v1203); real2 v1309 = reverse(minus(v1163, v1203)); real2 v1183 = ctimesminusplus(reverse(v1169), tbl[204 + tbloffset], ctimes(v1169, tbl[205 + tbloffset])); real2 v1314 = plus(v1143, v1183); real2 v1310 = minus(v1183, v1143); store(out, 15 << outShift, plus(v1314, v1315)); real2 v1328 = minus(v1314, v1315); store(out, 47 << outShift, ctimesminusplus(v1328, tbl[0 + tbloffset], ctimes(reverse(v1328), tbl[1 + tbloffset]))); real2 v1311 = minusplus(v1309, v1310); store(out, 31 << outShift, ctimesminusplus(reverse(v1311), tbl[226 + tbloffset], ctimes(v1311, tbl[227 + tbloffset]))); real2 v1313 = minusplus(uminus(v1309), v1310); store(out, 63 << outShift, ctimesminusplus(reverse(v1313), tbl[228 + tbloffset], ctimes(v1313, tbl[229 + tbloffset]))); real2 v1177 = ctimesminusplus(reverse(v1167), tbl[202 + tbloffset], ctimes(v1167, tbl[203 + tbloffset])); real2 v1137 = ctimesminusplus(reverse(v1127), tbl[194 + tbloffset], ctimes(v1127, tbl[195 + tbloffset])); real2 v1197 = ctimesminusplus(reverse(v1187), tbl[206 + tbloffset], ctimes(v1187, tbl[207 + tbloffset])); real2 v1157 = ctimesminusplus(reverse(v1147), tbl[198 + tbloffset], ctimes(v1147, tbl[199 + tbloffset])); real2 v1283 = reverse(minus(v1157, v1197)); real2 v1289 = plus(v1157, v1197); real2 v1288 = plus(v1137, v1177); real2 v1284 = minus(v1177, v1137); store(out, 7 << outShift, plus(v1288, v1289)); real2 v1302 = minus(v1288, v1289); store(out, 39 << outShift, ctimesminusplus(v1302, tbl[0 + tbloffset], ctimes(reverse(v1302), tbl[1 + tbloffset]))); real2 v1285 = minusplus(v1283, v1284); real2 v1287 = minusplus(uminus(v1283), v1284); store(out, 55 << outShift, ctimesminusplus(reverse(v1287), tbl[224 + tbloffset], ctimes(v1287, tbl[225 + tbloffset]))); store(out, 23 << outShift, ctimesminusplus(reverse(v1285), tbl[222 + tbloffset], ctimes(v1285, tbl[223 + tbloffset]))); // Pres : 17339 } } ALIGNED(8192) void but64b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); // Pres : 30254 real2 v37 = load(in, 35 << inShift); real2 v5 = load(in, 3 << inShift); real2 v132 = plus(v5, v37); real2 v128 = minus(v37, v5); real2 v21 = load(in, 19 << inShift); real2 v53 = load(in, 51 << inShift); real2 v133 = plus(v21, v53); real2 v127 = reverse(minus(v53, v21)); real2 v131 = minusplus(uminus(v127), v128); real2 v129 = minusplus(v127, v128); real2 v139 = ctimesminusplus(reverse(v129), tbl[14 + tbloffset], ctimes(v129, tbl[15 + tbloffset])); real2 v145 = ctimesminusplus(reverse(v131), tbl[16 + tbloffset], ctimes(v131, tbl[17 + tbloffset])); real2 v448 = minus(v133, v132); real2 v452 = plus(v132, v133); real2 v45 = load(in, 43 << inShift); real2 v13 = load(in, 11 << inShift); real2 v292 = plus(v13, v45); real2 v288 = minus(v45, v13); real2 v29 = load(in, 27 << inShift); real2 v61 = load(in, 59 << inShift); real2 v293 = plus(v29, v61); real2 v287 = reverse(minus(v61, v29)); real2 v291 = minusplus(uminus(v287), v288); real2 v289 = minusplus(v287, v288); real2 v299 = ctimesminusplus(reverse(v289), tbl[46 + tbloffset], ctimes(v289, tbl[47 + tbloffset])); real2 v453 = plus(v292, v293); real2 v447 = reverse(minus(v293, v292)); real2 v608 = minus(v453, v452); real2 v612 = plus(v452, v453); real2 v980 = plus(v139, v299); real2 v976 = minus(v299, v139); real2 v449 = minusplus(v447, v448); real2 v451 = minusplus(uminus(v447), v448); real2 v465 = ctimesminusplus(reverse(v451), tbl[80 + tbloffset], ctimes(v451, tbl[81 + tbloffset])); real2 v305 = ctimesminusplus(reverse(v291), tbl[48 + tbloffset], ctimes(v291, tbl[49 + tbloffset])); real2 v1186 = minus(v305, v145); real2 v1190 = plus(v145, v305); real2 v459 = ctimesminusplus(reverse(v449), tbl[78 + tbloffset], ctimes(v449, tbl[79 + tbloffset])); real2 v25 = load(in, 23 << inShift); real2 v57 = load(in, 55 << inShift); real2 v207 = reverse(minus(v57, v25)); real2 v213 = plus(v25, v57); real2 v9 = load(in, 7 << inShift); real2 v41 = load(in, 39 << inShift); real2 v212 = plus(v9, v41); real2 v208 = minus(v41, v9); real2 v528 = minus(v213, v212); real2 v532 = plus(v212, v213); real2 v209 = minusplus(v207, v208); real2 v211 = minusplus(uminus(v207), v208); real2 v225 = ctimesminusplus(reverse(v211), tbl[32 + tbloffset], ctimes(v211, tbl[33 + tbloffset])); real2 v219 = ctimesminusplus(reverse(v209), tbl[30 + tbloffset], ctimes(v209, tbl[31 + tbloffset])); real2 v17 = load(in, 15 << inShift); real2 v49 = load(in, 47 << inShift); real2 v368 = minus(v49, v17); real2 v372 = plus(v17, v49); real2 v33 = load(in, 31 << inShift); real2 v65 = load(in, 63 << inShift); real2 v367 = reverse(minus(v65, v33)); real2 v373 = plus(v33, v65); real2 v369 = minusplus(v367, v368); real2 v371 = minusplus(uminus(v367), v368); real2 v533 = plus(v372, v373); real2 v527 = reverse(minus(v373, v372)); real2 v607 = reverse(minus(v533, v532)); real2 v613 = plus(v532, v533); real2 v529 = minusplus(v527, v528); real2 v531 = minusplus(uminus(v527), v528); real2 v545 = ctimesminusplus(reverse(v531), tbl[96 + tbloffset], ctimes(v531, tbl[97 + tbloffset])); real2 v653 = plus(v612, v613); real2 v647 = reverse(minus(v613, v612)); real2 v609 = minusplus(v607, v608); real2 v611 = minusplus(uminus(v607), v608); real2 v863 = plus(v465, v545); real2 v857 = reverse(minus(v545, v465)); real2 v539 = ctimesminusplus(reverse(v529), tbl[94 + tbloffset], ctimes(v529, tbl[95 + tbloffset])); real2 v385 = ctimesminusplus(reverse(v371), tbl[64 + tbloffset], ctimes(v371, tbl[65 + tbloffset])); real2 v619 = ctimesminusplus(reverse(v609), tbl[110 + tbloffset], ctimes(v609, tbl[111 + tbloffset])); real2 v1191 = plus(v225, v385); real2 v1185 = reverse(minus(v385, v225)); real2 v779 = reverse(minus(v539, v459)); real2 v785 = plus(v459, v539); real2 v625 = ctimesminusplus(reverse(v611), tbl[112 + tbloffset], ctimes(v611, tbl[113 + tbloffset])); real2 v379 = ctimesminusplus(reverse(v369), tbl[62 + tbloffset], ctimes(v369, tbl[63 + tbloffset])); real2 v975 = reverse(minus(v379, v219)); real2 v981 = plus(v219, v379); real2 v977 = minusplus(v975, v976); real2 v979 = minusplus(uminus(v975), v976); real2 v987 = ctimesminusplus(reverse(v977), tbl[170 + tbloffset], ctimes(v977, tbl[171 + tbloffset])); real2 v993 = ctimesminusplus(reverse(v979), tbl[172 + tbloffset], ctimes(v979, tbl[173 + tbloffset])); real2 v1015 = reverse(minus(v981, v980)); real2 v1021 = plus(v980, v981); real2 v11 = load(in, 9 << inShift); real2 v43 = load(in, 41 << inShift); real2 v248 = minus(v43, v11); real2 v252 = plus(v11, v43); real2 v59 = load(in, 57 << inShift); real2 v27 = load(in, 25 << inShift); real2 v253 = plus(v27, v59); real2 v247 = reverse(minus(v59, v27)); real2 v413 = plus(v252, v253); real2 v407 = reverse(minus(v253, v252)); real2 v249 = minusplus(v247, v248); real2 v251 = minusplus(uminus(v247), v248); real2 v259 = ctimesminusplus(reverse(v249), tbl[38 + tbloffset], ctimes(v249, tbl[39 + tbloffset])); real2 v35 = load(in, 33 << inShift); real2 v3 = load(in, 1 << inShift); real2 v92 = plus(v3, v35); real2 v88 = minus(v35, v3); real2 v51 = load(in, 49 << inShift); real2 v19 = load(in, 17 << inShift); real2 v87 = reverse(minus(v51, v19)); real2 v93 = plus(v19, v51); real2 v412 = plus(v92, v93); real2 v408 = minus(v93, v92); real2 v411 = minusplus(uminus(v407), v408); real2 v409 = minusplus(v407, v408); real2 v91 = minusplus(uminus(v87), v88); real2 v89 = minusplus(v87, v88); real2 v99 = ctimesminusplus(reverse(v89), tbl[6 + tbloffset], ctimes(v89, tbl[7 + tbloffset])); real2 v425 = ctimesminusplus(reverse(v411), tbl[72 + tbloffset], ctimes(v411, tbl[73 + tbloffset])); real2 v568 = minus(v413, v412); real2 v572 = plus(v412, v413); real2 v940 = plus(v99, v259); real2 v936 = minus(v259, v99); real2 v419 = ctimesminusplus(reverse(v409), tbl[70 + tbloffset], ctimes(v409, tbl[71 + tbloffset])); real2 v47 = load(in, 45 << inShift); real2 v15 = load(in, 13 << inShift); real2 v332 = plus(v15, v47); real2 v328 = minus(v47, v15); real2 v63 = load(in, 61 << inShift); real2 v31 = load(in, 29 << inShift); real2 v327 = reverse(minus(v63, v31)); real2 v333 = plus(v31, v63); real2 v329 = minusplus(v327, v328); real2 v331 = minusplus(uminus(v327), v328); real2 v339 = ctimesminusplus(reverse(v329), tbl[54 + tbloffset], ctimes(v329, tbl[55 + tbloffset])); real2 v487 = reverse(minus(v333, v332)); real2 v493 = plus(v332, v333); real2 v7 = load(in, 5 << inShift); real2 v39 = load(in, 37 << inShift); real2 v172 = plus(v7, v39); real2 v168 = minus(v39, v7); real2 v55 = load(in, 53 << inShift); real2 v23 = load(in, 21 << inShift); real2 v173 = plus(v23, v55); real2 v167 = reverse(minus(v55, v23)); real2 v488 = minus(v173, v172); real2 v492 = plus(v172, v173); real2 v491 = minusplus(uminus(v487), v488); real2 v489 = minusplus(v487, v488); real2 v499 = ctimesminusplus(reverse(v489), tbl[86 + tbloffset], ctimes(v489, tbl[87 + tbloffset])); real2 v505 = ctimesminusplus(reverse(v491), tbl[88 + tbloffset], ctimes(v491, tbl[89 + tbloffset])); real2 v567 = reverse(minus(v493, v492)); real2 v573 = plus(v492, v493); real2 v571 = minusplus(uminus(v567), v568); real2 v569 = minusplus(v567, v568); real2 v579 = ctimesminusplus(reverse(v569), tbl[102 + tbloffset], ctimes(v569, tbl[103 + tbloffset])); real2 v585 = ctimesminusplus(reverse(v571), tbl[104 + tbloffset], ctimes(v571, tbl[105 + tbloffset])); real2 v739 = plus(v585, v625); real2 v733 = reverse(minus(v625, v585)); real2 v707 = reverse(minus(v619, v579)); real2 v713 = plus(v579, v619); real2 v648 = minus(v573, v572); real2 v652 = plus(v572, v573); real2 v673 = plus(v652, v653); real2 v667 = reverse(minus(v653, v652)); real2 v651 = minusplus(uminus(v647), v648); real2 v649 = minusplus(v647, v648); real2 v659 = ctimesminusplus(reverse(v649), tbl[118 + tbloffset], ctimes(v649, tbl[119 + tbloffset])); real2 v665 = ctimesminusplus(reverse(v651), tbl[120 + tbloffset], ctimes(v651, tbl[121 + tbloffset])); real2 v780 = minus(v499, v419); real2 v784 = plus(v419, v499); real2 v781 = minusplus(v779, v780); real2 v783 = minusplus(uminus(v779), v780); real2 v805 = plus(v784, v785); real2 v799 = reverse(minus(v785, v784)); real2 v862 = plus(v425, v505); real2 v858 = minus(v505, v425); real2 v859 = minusplus(v857, v858); real2 v861 = minusplus(uminus(v857), v858); real2 v875 = ctimesminusplus(reverse(v861), tbl[152 + tbloffset], ctimes(v861, tbl[153 + tbloffset])); real2 v791 = ctimesminusplus(reverse(v781), tbl[138 + tbloffset], ctimes(v781, tbl[139 + tbloffset])); real2 v797 = ctimesminusplus(reverse(v783), tbl[140 + tbloffset], ctimes(v783, tbl[141 + tbloffset])); real2 v883 = plus(v862, v863); real2 v877 = reverse(minus(v863, v862)); real2 v869 = ctimesminusplus(reverse(v859), tbl[150 + tbloffset], ctimes(v859, tbl[151 + tbloffset])); real2 v36 = load(in, 34 << inShift); real2 v4 = load(in, 2 << inShift); real2 v108 = minus(v36, v4); real2 v112 = plus(v4, v36); real2 v52 = load(in, 50 << inShift); real2 v20 = load(in, 18 << inShift); real2 v113 = plus(v20, v52); real2 v107 = reverse(minus(v52, v20)); real2 v428 = minus(v113, v112); real2 v432 = plus(v112, v113); real2 v12 = load(in, 10 << inShift); real2 v44 = load(in, 42 << inShift); real2 v268 = minus(v44, v12); real2 v272 = plus(v12, v44); real2 v28 = load(in, 26 << inShift); real2 v60 = load(in, 58 << inShift); real2 v267 = reverse(minus(v60, v28)); real2 v273 = plus(v28, v60); real2 v427 = reverse(minus(v273, v272)); real2 v433 = plus(v272, v273); real2 v431 = minusplus(uminus(v427), v428); real2 v429 = minusplus(v427, v428); real2 v439 = ctimesminusplus(reverse(v429), tbl[74 + tbloffset], ctimes(v429, tbl[75 + tbloffset])); real2 v588 = minus(v433, v432); real2 v592 = plus(v432, v433); real2 v40 = load(in, 38 << inShift); real2 v8 = load(in, 6 << inShift); real2 v188 = minus(v40, v8); real2 v192 = plus(v8, v40); real2 v24 = load(in, 22 << inShift); real2 v56 = load(in, 54 << inShift); real2 v187 = reverse(minus(v56, v24)); real2 v193 = plus(v24, v56); real2 v512 = plus(v192, v193); real2 v508 = minus(v193, v192); real2 v32 = load(in, 30 << inShift); real2 v64 = load(in, 62 << inShift); real2 v347 = reverse(minus(v64, v32)); real2 v353 = plus(v32, v64); real2 v48 = load(in, 46 << inShift); real2 v16 = load(in, 14 << inShift); real2 v348 = minus(v48, v16); real2 v352 = plus(v16, v48); real2 v513 = plus(v352, v353); real2 v507 = reverse(minus(v353, v352)); real2 v587 = reverse(minus(v513, v512)); real2 v593 = plus(v512, v513); real2 v633 = plus(v592, v593); real2 v627 = reverse(minus(v593, v592)); real2 v591 = minusplus(uminus(v587), v588); real2 v589 = minusplus(v587, v588); real2 v605 = ctimesminusplus(reverse(v591), tbl[108 + tbloffset], ctimes(v591, tbl[109 + tbloffset])); real2 v599 = ctimesminusplus(reverse(v589), tbl[106 + tbloffset], ctimes(v589, tbl[107 + tbloffset])); real2 v46 = load(in, 44 << inShift); real2 v14 = load(in, 12 << inShift); real2 v312 = plus(v14, v46); real2 v308 = minus(v46, v14); real2 v62 = load(in, 60 << inShift); real2 v30 = load(in, 28 << inShift); real2 v313 = plus(v30, v62); real2 v307 = reverse(minus(v62, v30)); real2 v467 = reverse(minus(v313, v312)); real2 v473 = plus(v312, v313); real2 v22 = load(in, 20 << inShift); real2 v54 = load(in, 52 << inShift); real2 v147 = reverse(minus(v54, v22)); real2 v153 = plus(v22, v54); real2 v6 = load(in, 4 << inShift); real2 v38 = load(in, 36 << inShift); real2 v148 = minus(v38, v6); real2 v152 = plus(v6, v38); real2 v472 = plus(v152, v153); real2 v468 = minus(v153, v152); real2 v547 = reverse(minus(v473, v472)); real2 v553 = plus(v472, v473); real2 v10 = load(in, 8 << inShift); real2 v42 = load(in, 40 << inShift); real2 v232 = plus(v10, v42); real2 v228 = minus(v42, v10); real2 v58 = load(in, 56 << inShift); real2 v26 = load(in, 24 << inShift); real2 v233 = plus(v26, v58); real2 v227 = reverse(minus(v58, v26)); real2 v393 = plus(v232, v233); real2 v387 = reverse(minus(v233, v232)); real2 v2 = load(in, 0 << inShift); real2 v34 = load(in, 32 << inShift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v18 = load(in, 16 << inShift); real2 v50 = load(in, 48 << inShift); real2 v73 = plus(v18, v50); real2 v67 = reverse(minus(v50, v18)); real2 v388 = minus(v73, v72); real2 v392 = plus(v72, v73); real2 v548 = minus(v393, v392); real2 v552 = plus(v392, v393); real2 v628 = minus(v553, v552); real2 v632 = plus(v552, v553); real2 v672 = plus(v632, v633); real2 v668 = minus(v633, v632); store(out, 0 << outShift, plus(v672, v673)); real2 v686 = minus(v672, v673); store(out, 32 << outShift, ctimesminusplus(v686, tbl[0 + tbloffset], ctimes(reverse(v686), tbl[1 + tbloffset]))); real2 v669 = minusplus(v667, v668); real2 v671 = minusplus(uminus(v667), v668); store(out, 48 << outShift, ctimesminusplus(reverse(v671), tbl[124 + tbloffset], ctimes(v671, tbl[125 + tbloffset]))); store(out, 16 << outShift, ctimesminusplus(reverse(v669), tbl[122 + tbloffset], ctimes(v669, tbl[123 + tbloffset]))); real2 v631 = minusplus(uminus(v627), v628); real2 v629 = minusplus(v627, v628); real2 v639 = ctimesminusplus(reverse(v629), tbl[114 + tbloffset], ctimes(v629, tbl[115 + tbloffset])); store(out, 8 << outShift, plus(v639, v659)); real2 v694 = minus(v639, v659); store(out, 40 << outShift, ctimesminusplus(v694, tbl[0 + tbloffset], ctimes(reverse(v694), tbl[1 + tbloffset]))); real2 v645 = ctimesminusplus(reverse(v631), tbl[116 + tbloffset], ctimes(v631, tbl[117 + tbloffset])); store(out, 24 << outShift, plus(v645, v665)); real2 v700 = minus(v645, v665); store(out, 56 << outShift, ctimesminusplus(v700, tbl[0 + tbloffset], ctimes(reverse(v700), tbl[1 + tbloffset]))); real2 v549 = minusplus(v547, v548); real2 v551 = minusplus(uminus(v547), v548); real2 v559 = ctimesminusplus(reverse(v549), tbl[98 + tbloffset], ctimes(v549, tbl[99 + tbloffset])); real2 v708 = minus(v599, v559); real2 v712 = plus(v559, v599); store(out, 4 << outShift, plus(v712, v713)); real2 v726 = minus(v712, v713); store(out, 36 << outShift, ctimesminusplus(v726, tbl[0 + tbloffset], ctimes(reverse(v726), tbl[1 + tbloffset]))); real2 v711 = minusplus(uminus(v707), v708); real2 v709 = minusplus(v707, v708); store(out, 20 << outShift, ctimesminusplus(reverse(v709), tbl[126 + tbloffset], ctimes(v709, tbl[127 + tbloffset]))); store(out, 52 << outShift, ctimesminusplus(reverse(v711), tbl[128 + tbloffset], ctimes(v711, tbl[129 + tbloffset]))); real2 v565 = ctimesminusplus(reverse(v551), tbl[100 + tbloffset], ctimes(v551, tbl[101 + tbloffset])); real2 v738 = plus(v565, v605); real2 v734 = minus(v605, v565); store(out, 12 << outShift, plus(v738, v739)); real2 v752 = minus(v738, v739); store(out, 44 << outShift, ctimesminusplus(v752, tbl[0 + tbloffset], ctimes(reverse(v752), tbl[1 + tbloffset]))); real2 v737 = minusplus(uminus(v733), v734); store(out, 60 << outShift, ctimesminusplus(reverse(v737), tbl[132 + tbloffset], ctimes(v737, tbl[133 + tbloffset]))); real2 v735 = minusplus(v733, v734); store(out, 28 << outShift, ctimesminusplus(reverse(v735), tbl[130 + tbloffset], ctimes(v735, tbl[131 + tbloffset]))); real2 v471 = minusplus(uminus(v467), v468); real2 v469 = minusplus(v467, v468); real2 v479 = ctimesminusplus(reverse(v469), tbl[82 + tbloffset], ctimes(v469, tbl[83 + tbloffset])); real2 v511 = minusplus(uminus(v507), v508); real2 v509 = minusplus(v507, v508); real2 v519 = ctimesminusplus(reverse(v509), tbl[90 + tbloffset], ctimes(v509, tbl[91 + tbloffset])); real2 v765 = plus(v439, v519); real2 v759 = reverse(minus(v519, v439)); real2 v389 = minusplus(v387, v388); real2 v391 = minusplus(uminus(v387), v388); real2 v399 = ctimesminusplus(reverse(v389), tbl[66 + tbloffset], ctimes(v389, tbl[67 + tbloffset])); real2 v764 = plus(v399, v479); real2 v760 = minus(v479, v399); real2 v804 = plus(v764, v765); real2 v800 = minus(v765, v764); store(out, 2 << outShift, plus(v804, v805)); real2 v818 = minus(v804, v805); store(out, 34 << outShift, ctimesminusplus(v818, tbl[0 + tbloffset], ctimes(reverse(v818), tbl[1 + tbloffset]))); real2 v803 = minusplus(uminus(v799), v800); store(out, 50 << outShift, ctimesminusplus(reverse(v803), tbl[144 + tbloffset], ctimes(v803, tbl[145 + tbloffset]))); real2 v801 = minusplus(v799, v800); store(out, 18 << outShift, ctimesminusplus(reverse(v801), tbl[142 + tbloffset], ctimes(v801, tbl[143 + tbloffset]))); real2 v763 = minusplus(uminus(v759), v760); real2 v761 = minusplus(v759, v760); real2 v777 = ctimesminusplus(reverse(v763), tbl[136 + tbloffset], ctimes(v763, tbl[137 + tbloffset])); store(out, 26 << outShift, plus(v777, v797)); real2 v830 = minus(v777, v797); store(out, 58 << outShift, ctimesminusplus(v830, tbl[0 + tbloffset], ctimes(reverse(v830), tbl[1 + tbloffset]))); real2 v771 = ctimesminusplus(reverse(v761), tbl[134 + tbloffset], ctimes(v761, tbl[135 + tbloffset])); store(out, 10 << outShift, plus(v771, v791)); real2 v824 = minus(v771, v791); store(out, 42 << outShift, ctimesminusplus(v824, tbl[0 + tbloffset], ctimes(reverse(v824), tbl[1 + tbloffset]))); real2 v445 = ctimesminusplus(reverse(v431), tbl[76 + tbloffset], ctimes(v431, tbl[77 + tbloffset])); real2 v525 = ctimesminusplus(reverse(v511), tbl[92 + tbloffset], ctimes(v511, tbl[93 + tbloffset])); real2 v837 = reverse(minus(v525, v445)); real2 v843 = plus(v445, v525); real2 v485 = ctimesminusplus(reverse(v471), tbl[84 + tbloffset], ctimes(v471, tbl[85 + tbloffset])); real2 v405 = ctimesminusplus(reverse(v391), tbl[68 + tbloffset], ctimes(v391, tbl[69 + tbloffset])); real2 v838 = minus(v485, v405); real2 v842 = plus(v405, v485); real2 v878 = minus(v843, v842); real2 v882 = plus(v842, v843); store(out, 6 << outShift, plus(v882, v883)); real2 v896 = minus(v882, v883); store(out, 38 << outShift, ctimesminusplus(v896, tbl[0 + tbloffset], ctimes(reverse(v896), tbl[1 + tbloffset]))); real2 v881 = minusplus(uminus(v877), v878); store(out, 54 << outShift, ctimesminusplus(reverse(v881), tbl[156 + tbloffset], ctimes(v881, tbl[157 + tbloffset]))); real2 v879 = minusplus(v877, v878); store(out, 22 << outShift, ctimesminusplus(reverse(v879), tbl[154 + tbloffset], ctimes(v879, tbl[155 + tbloffset]))); real2 v841 = minusplus(uminus(v837), v838); real2 v839 = minusplus(v837, v838); real2 v855 = ctimesminusplus(reverse(v841), tbl[148 + tbloffset], ctimes(v841, tbl[149 + tbloffset])); store(out, 30 << outShift, plus(v855, v875)); real2 v908 = minus(v855, v875); store(out, 62 << outShift, ctimesminusplus(v908, tbl[0 + tbloffset], ctimes(reverse(v908), tbl[1 + tbloffset]))); real2 v849 = ctimesminusplus(reverse(v839), tbl[146 + tbloffset], ctimes(v839, tbl[147 + tbloffset])); store(out, 14 << outShift, plus(v849, v869)); real2 v902 = minus(v849, v869); store(out, 46 << outShift, ctimesminusplus(v902, tbl[0 + tbloffset], ctimes(reverse(v902), tbl[1 + tbloffset]))); real2 v151 = minusplus(uminus(v147), v148); real2 v149 = minusplus(v147, v148); real2 v311 = minusplus(uminus(v307), v308); real2 v309 = minusplus(v307, v308); real2 v109 = minusplus(v107, v108); real2 v111 = minusplus(uminus(v107), v108); real2 v119 = ctimesminusplus(reverse(v109), tbl[10 + tbloffset], ctimes(v109, tbl[11 + tbloffset])); real2 v269 = minusplus(v267, v268); real2 v271 = minusplus(uminus(v267), v268); real2 v279 = ctimesminusplus(reverse(v269), tbl[42 + tbloffset], ctimes(v269, tbl[43 + tbloffset])); real2 v960 = plus(v119, v279); real2 v956 = minus(v279, v119); real2 v169 = minusplus(v167, v168); real2 v171 = minusplus(uminus(v167), v168); real2 v159 = ctimesminusplus(reverse(v149), tbl[18 + tbloffset], ctimes(v149, tbl[19 + tbloffset])); real2 v319 = ctimesminusplus(reverse(v309), tbl[50 + tbloffset], ctimes(v309, tbl[51 + tbloffset])); real2 v921 = plus(v159, v319); real2 v915 = reverse(minus(v319, v159)); real2 v351 = minusplus(uminus(v347), v348); real2 v349 = minusplus(v347, v348); real2 v359 = ctimesminusplus(reverse(v349), tbl[58 + tbloffset], ctimes(v349, tbl[59 + tbloffset])); real2 v191 = minusplus(uminus(v187), v188); real2 v189 = minusplus(v187, v188); real2 v199 = ctimesminusplus(reverse(v189), tbl[26 + tbloffset], ctimes(v189, tbl[27 + tbloffset])); real2 v961 = plus(v199, v359); real2 v955 = reverse(minus(v359, v199)); real2 v995 = reverse(minus(v961, v960)); real2 v1001 = plus(v960, v961); real2 v179 = ctimesminusplus(reverse(v169), tbl[22 + tbloffset], ctimes(v169, tbl[23 + tbloffset])); real2 v941 = plus(v179, v339); real2 v935 = reverse(minus(v339, v179)); real2 v1016 = minus(v941, v940); real2 v1020 = plus(v940, v941); real2 v71 = minusplus(uminus(v67), v68); real2 v69 = minusplus(v67, v68); real2 v79 = ctimesminusplus(reverse(v69), tbl[2 + tbloffset], ctimes(v69, tbl[3 + tbloffset])); real2 v1041 = plus(v1020, v1021); real2 v1035 = reverse(minus(v1021, v1020)); real2 v229 = minusplus(v227, v228); real2 v231 = minusplus(uminus(v227), v228); real2 v239 = ctimesminusplus(reverse(v229), tbl[34 + tbloffset], ctimes(v229, tbl[35 + tbloffset])); real2 v920 = plus(v79, v239); real2 v916 = minus(v239, v79); real2 v996 = minus(v921, v920); real2 v1000 = plus(v920, v921); real2 v1040 = plus(v1000, v1001); real2 v1036 = minus(v1001, v1000); store(out, 1 << outShift, plus(v1040, v1041)); real2 v1054 = minus(v1040, v1041); store(out, 33 << outShift, ctimesminusplus(v1054, tbl[0 + tbloffset], ctimes(reverse(v1054), tbl[1 + tbloffset]))); real2 v1037 = minusplus(v1035, v1036); real2 v1039 = minusplus(uminus(v1035), v1036); store(out, 49 << outShift, ctimesminusplus(reverse(v1039), tbl[184 + tbloffset], ctimes(v1039, tbl[185 + tbloffset]))); store(out, 17 << outShift, ctimesminusplus(reverse(v1037), tbl[182 + tbloffset], ctimes(v1037, tbl[183 + tbloffset]))); real2 v1017 = minusplus(v1015, v1016); real2 v1019 = minusplus(uminus(v1015), v1016); real2 v1033 = ctimesminusplus(reverse(v1019), tbl[180 + tbloffset], ctimes(v1019, tbl[181 + tbloffset])); real2 v997 = minusplus(v995, v996); real2 v999 = minusplus(uminus(v995), v996); real2 v1013 = ctimesminusplus(reverse(v999), tbl[176 + tbloffset], ctimes(v999, tbl[177 + tbloffset])); store(out, 25 << outShift, plus(v1013, v1033)); real2 v1066 = minus(v1013, v1033); store(out, 57 << outShift, ctimesminusplus(v1066, tbl[0 + tbloffset], ctimes(reverse(v1066), tbl[1 + tbloffset]))); real2 v1027 = ctimesminusplus(reverse(v1017), tbl[178 + tbloffset], ctimes(v1017, tbl[179 + tbloffset])); real2 v1007 = ctimesminusplus(reverse(v997), tbl[174 + tbloffset], ctimes(v997, tbl[175 + tbloffset])); store(out, 9 << outShift, plus(v1007, v1027)); real2 v1060 = minus(v1007, v1027); store(out, 41 << outShift, ctimesminusplus(v1060, tbl[0 + tbloffset], ctimes(reverse(v1060), tbl[1 + tbloffset]))); real2 v937 = minusplus(v935, v936); real2 v939 = minusplus(uminus(v935), v936); real2 v959 = minusplus(uminus(v955), v956); real2 v957 = minusplus(v955, v956); real2 v967 = ctimesminusplus(reverse(v957), tbl[166 + tbloffset], ctimes(v957, tbl[167 + tbloffset])); real2 v947 = ctimesminusplus(reverse(v937), tbl[162 + tbloffset], ctimes(v937, tbl[163 + tbloffset])); real2 v919 = minusplus(uminus(v915), v916); real2 v917 = minusplus(v915, v916); real2 v1079 = plus(v947, v987); real2 v1073 = reverse(minus(v987, v947)); real2 v927 = ctimesminusplus(reverse(v917), tbl[158 + tbloffset], ctimes(v917, tbl[159 + tbloffset])); real2 v1074 = minus(v967, v927); real2 v1078 = plus(v927, v967); store(out, 5 << outShift, plus(v1078, v1079)); real2 v1092 = minus(v1078, v1079); store(out, 37 << outShift, ctimesminusplus(v1092, tbl[0 + tbloffset], ctimes(reverse(v1092), tbl[1 + tbloffset]))); real2 v1075 = minusplus(v1073, v1074); store(out, 21 << outShift, ctimesminusplus(reverse(v1075), tbl[186 + tbloffset], ctimes(v1075, tbl[187 + tbloffset]))); real2 v1077 = minusplus(uminus(v1073), v1074); store(out, 53 << outShift, ctimesminusplus(reverse(v1077), tbl[188 + tbloffset], ctimes(v1077, tbl[189 + tbloffset]))); real2 v953 = ctimesminusplus(reverse(v939), tbl[164 + tbloffset], ctimes(v939, tbl[165 + tbloffset])); real2 v1099 = reverse(minus(v993, v953)); real2 v1105 = plus(v953, v993); real2 v973 = ctimesminusplus(reverse(v959), tbl[168 + tbloffset], ctimes(v959, tbl[169 + tbloffset])); real2 v933 = ctimesminusplus(reverse(v919), tbl[160 + tbloffset], ctimes(v919, tbl[161 + tbloffset])); real2 v1104 = plus(v933, v973); real2 v1100 = minus(v973, v933); store(out, 13 << outShift, plus(v1104, v1105)); real2 v1118 = minus(v1104, v1105); store(out, 45 << outShift, ctimesminusplus(v1118, tbl[0 + tbloffset], ctimes(reverse(v1118), tbl[1 + tbloffset]))); real2 v1101 = minusplus(v1099, v1100); store(out, 29 << outShift, ctimesminusplus(reverse(v1101), tbl[190 + tbloffset], ctimes(v1101, tbl[191 + tbloffset]))); real2 v1103 = minusplus(uminus(v1099), v1100); store(out, 61 << outShift, ctimesminusplus(reverse(v1103), tbl[192 + tbloffset], ctimes(v1103, tbl[193 + tbloffset]))); real2 v345 = ctimesminusplus(reverse(v331), tbl[56 + tbloffset], ctimes(v331, tbl[57 + tbloffset])); real2 v325 = ctimesminusplus(reverse(v311), tbl[52 + tbloffset], ctimes(v311, tbl[53 + tbloffset])); real2 v265 = ctimesminusplus(reverse(v251), tbl[40 + tbloffset], ctimes(v251, tbl[41 + tbloffset])); real2 v185 = ctimesminusplus(reverse(v171), tbl[24 + tbloffset], ctimes(v171, tbl[25 + tbloffset])); real2 v165 = ctimesminusplus(reverse(v151), tbl[20 + tbloffset], ctimes(v151, tbl[21 + tbloffset])); real2 v1131 = plus(v165, v325); real2 v1125 = reverse(minus(v325, v165)); real2 v1151 = plus(v185, v345); real2 v1145 = reverse(minus(v345, v185)); real2 v105 = ctimesminusplus(reverse(v91), tbl[8 + tbloffset], ctimes(v91, tbl[9 + tbloffset])); real2 v1150 = plus(v105, v265); real2 v1146 = minus(v265, v105); real2 v1226 = minus(v1151, v1150); real2 v1230 = plus(v1150, v1151); real2 v1231 = plus(v1190, v1191); real2 v1225 = reverse(minus(v1191, v1190)); real2 v1245 = reverse(minus(v1231, v1230)); real2 v1251 = plus(v1230, v1231); real2 v365 = ctimesminusplus(reverse(v351), tbl[60 + tbloffset], ctimes(v351, tbl[61 + tbloffset])); real2 v285 = ctimesminusplus(reverse(v271), tbl[44 + tbloffset], ctimes(v271, tbl[45 + tbloffset])); real2 v205 = ctimesminusplus(reverse(v191), tbl[28 + tbloffset], ctimes(v191, tbl[29 + tbloffset])); real2 v1171 = plus(v205, v365); real2 v1165 = reverse(minus(v365, v205)); real2 v125 = ctimesminusplus(reverse(v111), tbl[12 + tbloffset], ctimes(v111, tbl[13 + tbloffset])); real2 v85 = ctimesminusplus(reverse(v71), tbl[4 + tbloffset], ctimes(v71, tbl[5 + tbloffset])); real2 v245 = ctimesminusplus(reverse(v231), tbl[36 + tbloffset], ctimes(v231, tbl[37 + tbloffset])); real2 v1126 = minus(v245, v85); real2 v1130 = plus(v85, v245); real2 v1210 = plus(v1130, v1131); real2 v1206 = minus(v1131, v1130); real2 v1166 = minus(v285, v125); real2 v1170 = plus(v125, v285); real2 v1211 = plus(v1170, v1171); real2 v1205 = reverse(minus(v1171, v1170)); real2 v1246 = minus(v1211, v1210); real2 v1250 = plus(v1210, v1211); store(out, 3 << outShift, plus(v1250, v1251)); real2 v1264 = minus(v1250, v1251); store(out, 35 << outShift, ctimesminusplus(v1264, tbl[0 + tbloffset], ctimes(reverse(v1264), tbl[1 + tbloffset]))); real2 v1247 = minusplus(v1245, v1246); real2 v1249 = minusplus(uminus(v1245), v1246); store(out, 19 << outShift, ctimesminusplus(reverse(v1247), tbl[218 + tbloffset], ctimes(v1247, tbl[219 + tbloffset]))); store(out, 51 << outShift, ctimesminusplus(reverse(v1249), tbl[220 + tbloffset], ctimes(v1249, tbl[221 + tbloffset]))); real2 v1229 = minusplus(uminus(v1225), v1226); real2 v1227 = minusplus(v1225, v1226); real2 v1207 = minusplus(v1205, v1206); real2 v1209 = minusplus(uminus(v1205), v1206); real2 v1237 = ctimesminusplus(reverse(v1227), tbl[214 + tbloffset], ctimes(v1227, tbl[215 + tbloffset])); real2 v1217 = ctimesminusplus(reverse(v1207), tbl[210 + tbloffset], ctimes(v1207, tbl[211 + tbloffset])); store(out, 11 << outShift, plus(v1217, v1237)); real2 v1270 = minus(v1217, v1237); store(out, 43 << outShift, ctimesminusplus(v1270, tbl[0 + tbloffset], ctimes(reverse(v1270), tbl[1 + tbloffset]))); real2 v1223 = ctimesminusplus(reverse(v1209), tbl[212 + tbloffset], ctimes(v1209, tbl[213 + tbloffset])); real2 v1243 = ctimesminusplus(reverse(v1229), tbl[216 + tbloffset], ctimes(v1229, tbl[217 + tbloffset])); store(out, 27 << outShift, plus(v1223, v1243)); real2 v1276 = minus(v1223, v1243); store(out, 59 << outShift, ctimesminusplus(v1276, tbl[0 + tbloffset], ctimes(reverse(v1276), tbl[1 + tbloffset]))); real2 v1189 = minusplus(uminus(v1185), v1186); real2 v1187 = minusplus(v1185, v1186); real2 v1129 = minusplus(uminus(v1125), v1126); real2 v1127 = minusplus(v1125, v1126); real2 v1147 = minusplus(v1145, v1146); real2 v1149 = minusplus(uminus(v1145), v1146); real2 v1167 = minusplus(v1165, v1166); real2 v1169 = minusplus(uminus(v1165), v1166); real2 v1143 = ctimesminusplus(reverse(v1129), tbl[196 + tbloffset], ctimes(v1129, tbl[197 + tbloffset])); real2 v1163 = ctimesminusplus(reverse(v1149), tbl[200 + tbloffset], ctimes(v1149, tbl[201 + tbloffset])); real2 v1203 = ctimesminusplus(reverse(v1189), tbl[208 + tbloffset], ctimes(v1189, tbl[209 + tbloffset])); real2 v1315 = plus(v1163, v1203); real2 v1309 = reverse(minus(v1203, v1163)); real2 v1183 = ctimesminusplus(reverse(v1169), tbl[204 + tbloffset], ctimes(v1169, tbl[205 + tbloffset])); real2 v1314 = plus(v1143, v1183); real2 v1310 = minus(v1183, v1143); store(out, 15 << outShift, plus(v1314, v1315)); real2 v1328 = minus(v1314, v1315); store(out, 47 << outShift, ctimesminusplus(v1328, tbl[0 + tbloffset], ctimes(reverse(v1328), tbl[1 + tbloffset]))); real2 v1311 = minusplus(v1309, v1310); store(out, 31 << outShift, ctimesminusplus(reverse(v1311), tbl[226 + tbloffset], ctimes(v1311, tbl[227 + tbloffset]))); real2 v1313 = minusplus(uminus(v1309), v1310); store(out, 63 << outShift, ctimesminusplus(reverse(v1313), tbl[228 + tbloffset], ctimes(v1313, tbl[229 + tbloffset]))); real2 v1177 = ctimesminusplus(reverse(v1167), tbl[202 + tbloffset], ctimes(v1167, tbl[203 + tbloffset])); real2 v1137 = ctimesminusplus(reverse(v1127), tbl[194 + tbloffset], ctimes(v1127, tbl[195 + tbloffset])); real2 v1197 = ctimesminusplus(reverse(v1187), tbl[206 + tbloffset], ctimes(v1187, tbl[207 + tbloffset])); real2 v1157 = ctimesminusplus(reverse(v1147), tbl[198 + tbloffset], ctimes(v1147, tbl[199 + tbloffset])); real2 v1283 = reverse(minus(v1197, v1157)); real2 v1289 = plus(v1157, v1197); real2 v1288 = plus(v1137, v1177); real2 v1284 = minus(v1177, v1137); store(out, 7 << outShift, plus(v1288, v1289)); real2 v1302 = minus(v1288, v1289); store(out, 39 << outShift, ctimesminusplus(v1302, tbl[0 + tbloffset], ctimes(reverse(v1302), tbl[1 + tbloffset]))); real2 v1285 = minusplus(v1283, v1284); real2 v1287 = minusplus(uminus(v1283), v1284); store(out, 55 << outShift, ctimesminusplus(reverse(v1287), tbl[224 + tbloffset], ctimes(v1287, tbl[225 + tbloffset]))); store(out, 23 << outShift, ctimesminusplus(reverse(v1285), tbl[222 + tbloffset], ctimes(v1285, tbl[223 + tbloffset]))); // Pres : 17339 } } ALIGNED(8192) void tbut64f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; // Pres : 30254 real2 v37 = load(in, 35 << inShift); real2 v5 = load(in, 3 << inShift); real2 v132 = plus(v5, v37); real2 v128 = minus(v37, v5); real2 v21 = load(in, 19 << inShift); real2 v53 = load(in, 51 << inShift); real2 v133 = plus(v21, v53); real2 v127 = reverse(minus(v21, v53)); real2 v131 = minusplus(uminus(v127), v128); real2 v129 = minusplus(v127, v128); real2 v139 = timesminusplus(reverse(v129), load(tbl, 14 * VECWIDTH + tbloffset), times(v129, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v145 = timesminusplus(reverse(v131), load(tbl, 16 * VECWIDTH + tbloffset), times(v131, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v448 = minus(v133, v132); real2 v452 = plus(v132, v133); real2 v45 = load(in, 43 << inShift); real2 v13 = load(in, 11 << inShift); real2 v292 = plus(v13, v45); real2 v288 = minus(v45, v13); real2 v29 = load(in, 27 << inShift); real2 v61 = load(in, 59 << inShift); real2 v293 = plus(v29, v61); real2 v287 = reverse(minus(v29, v61)); real2 v291 = minusplus(uminus(v287), v288); real2 v289 = minusplus(v287, v288); real2 v299 = timesminusplus(reverse(v289), load(tbl, 46 * VECWIDTH + tbloffset), times(v289, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v453 = plus(v292, v293); real2 v447 = reverse(minus(v292, v293)); real2 v608 = minus(v453, v452); real2 v612 = plus(v452, v453); real2 v980 = plus(v139, v299); real2 v976 = minus(v299, v139); real2 v449 = minusplus(v447, v448); real2 v451 = minusplus(uminus(v447), v448); real2 v465 = timesminusplus(reverse(v451), load(tbl, 80 * VECWIDTH + tbloffset), times(v451, load(tbl, 81 * VECWIDTH + tbloffset))); real2 v305 = timesminusplus(reverse(v291), load(tbl, 48 * VECWIDTH + tbloffset), times(v291, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v1186 = minus(v305, v145); real2 v1190 = plus(v145, v305); real2 v459 = timesminusplus(reverse(v449), load(tbl, 78 * VECWIDTH + tbloffset), times(v449, load(tbl, 79 * VECWIDTH + tbloffset))); real2 v25 = load(in, 23 << inShift); real2 v57 = load(in, 55 << inShift); real2 v207 = reverse(minus(v25, v57)); real2 v213 = plus(v25, v57); real2 v9 = load(in, 7 << inShift); real2 v41 = load(in, 39 << inShift); real2 v212 = plus(v9, v41); real2 v208 = minus(v41, v9); real2 v528 = minus(v213, v212); real2 v532 = plus(v212, v213); real2 v209 = minusplus(v207, v208); real2 v211 = minusplus(uminus(v207), v208); real2 v225 = timesminusplus(reverse(v211), load(tbl, 32 * VECWIDTH + tbloffset), times(v211, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v219 = timesminusplus(reverse(v209), load(tbl, 30 * VECWIDTH + tbloffset), times(v209, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v17 = load(in, 15 << inShift); real2 v49 = load(in, 47 << inShift); real2 v368 = minus(v49, v17); real2 v372 = plus(v17, v49); real2 v33 = load(in, 31 << inShift); real2 v65 = load(in, 63 << inShift); real2 v367 = reverse(minus(v33, v65)); real2 v373 = plus(v33, v65); real2 v369 = minusplus(v367, v368); real2 v371 = minusplus(uminus(v367), v368); real2 v533 = plus(v372, v373); real2 v527 = reverse(minus(v372, v373)); real2 v607 = reverse(minus(v532, v533)); real2 v613 = plus(v532, v533); real2 v529 = minusplus(v527, v528); real2 v531 = minusplus(uminus(v527), v528); real2 v545 = timesminusplus(reverse(v531), load(tbl, 96 * VECWIDTH + tbloffset), times(v531, load(tbl, 97 * VECWIDTH + tbloffset))); real2 v653 = plus(v612, v613); real2 v647 = reverse(minus(v612, v613)); real2 v609 = minusplus(v607, v608); real2 v611 = minusplus(uminus(v607), v608); real2 v863 = plus(v465, v545); real2 v857 = reverse(minus(v465, v545)); real2 v539 = timesminusplus(reverse(v529), load(tbl, 94 * VECWIDTH + tbloffset), times(v529, load(tbl, 95 * VECWIDTH + tbloffset))); real2 v385 = timesminusplus(reverse(v371), load(tbl, 64 * VECWIDTH + tbloffset), times(v371, load(tbl, 65 * VECWIDTH + tbloffset))); real2 v619 = timesminusplus(reverse(v609), load(tbl, 110 * VECWIDTH + tbloffset), times(v609, load(tbl, 111 * VECWIDTH + tbloffset))); real2 v1191 = plus(v225, v385); real2 v1185 = reverse(minus(v225, v385)); real2 v779 = reverse(minus(v459, v539)); real2 v785 = plus(v459, v539); real2 v625 = timesminusplus(reverse(v611), load(tbl, 112 * VECWIDTH + tbloffset), times(v611, load(tbl, 113 * VECWIDTH + tbloffset))); real2 v379 = timesminusplus(reverse(v369), load(tbl, 62 * VECWIDTH + tbloffset), times(v369, load(tbl, 63 * VECWIDTH + tbloffset))); real2 v975 = reverse(minus(v219, v379)); real2 v981 = plus(v219, v379); real2 v977 = minusplus(v975, v976); real2 v979 = minusplus(uminus(v975), v976); real2 v987 = timesminusplus(reverse(v977), load(tbl, 170 * VECWIDTH + tbloffset), times(v977, load(tbl, 171 * VECWIDTH + tbloffset))); real2 v993 = timesminusplus(reverse(v979), load(tbl, 172 * VECWIDTH + tbloffset), times(v979, load(tbl, 173 * VECWIDTH + tbloffset))); real2 v1015 = reverse(minus(v980, v981)); real2 v1021 = plus(v980, v981); real2 v11 = load(in, 9 << inShift); real2 v43 = load(in, 41 << inShift); real2 v248 = minus(v43, v11); real2 v252 = plus(v11, v43); real2 v59 = load(in, 57 << inShift); real2 v27 = load(in, 25 << inShift); real2 v253 = plus(v27, v59); real2 v247 = reverse(minus(v27, v59)); real2 v413 = plus(v252, v253); real2 v407 = reverse(minus(v252, v253)); real2 v249 = minusplus(v247, v248); real2 v251 = minusplus(uminus(v247), v248); real2 v259 = timesminusplus(reverse(v249), load(tbl, 38 * VECWIDTH + tbloffset), times(v249, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v35 = load(in, 33 << inShift); real2 v3 = load(in, 1 << inShift); real2 v92 = plus(v3, v35); real2 v88 = minus(v35, v3); real2 v51 = load(in, 49 << inShift); real2 v19 = load(in, 17 << inShift); real2 v87 = reverse(minus(v19, v51)); real2 v93 = plus(v19, v51); real2 v412 = plus(v92, v93); real2 v408 = minus(v93, v92); real2 v411 = minusplus(uminus(v407), v408); real2 v409 = minusplus(v407, v408); real2 v91 = minusplus(uminus(v87), v88); real2 v89 = minusplus(v87, v88); real2 v99 = timesminusplus(reverse(v89), load(tbl, 6 * VECWIDTH + tbloffset), times(v89, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v425 = timesminusplus(reverse(v411), load(tbl, 72 * VECWIDTH + tbloffset), times(v411, load(tbl, 73 * VECWIDTH + tbloffset))); real2 v568 = minus(v413, v412); real2 v572 = plus(v412, v413); real2 v940 = plus(v99, v259); real2 v936 = minus(v259, v99); real2 v419 = timesminusplus(reverse(v409), load(tbl, 70 * VECWIDTH + tbloffset), times(v409, load(tbl, 71 * VECWIDTH + tbloffset))); real2 v47 = load(in, 45 << inShift); real2 v15 = load(in, 13 << inShift); real2 v332 = plus(v15, v47); real2 v328 = minus(v47, v15); real2 v63 = load(in, 61 << inShift); real2 v31 = load(in, 29 << inShift); real2 v327 = reverse(minus(v31, v63)); real2 v333 = plus(v31, v63); real2 v329 = minusplus(v327, v328); real2 v331 = minusplus(uminus(v327), v328); real2 v339 = timesminusplus(reverse(v329), load(tbl, 54 * VECWIDTH + tbloffset), times(v329, load(tbl, 55 * VECWIDTH + tbloffset))); real2 v487 = reverse(minus(v332, v333)); real2 v493 = plus(v332, v333); real2 v7 = load(in, 5 << inShift); real2 v39 = load(in, 37 << inShift); real2 v172 = plus(v7, v39); real2 v168 = minus(v39, v7); real2 v55 = load(in, 53 << inShift); real2 v23 = load(in, 21 << inShift); real2 v173 = plus(v23, v55); real2 v167 = reverse(minus(v23, v55)); real2 v488 = minus(v173, v172); real2 v492 = plus(v172, v173); real2 v491 = minusplus(uminus(v487), v488); real2 v489 = minusplus(v487, v488); real2 v499 = timesminusplus(reverse(v489), load(tbl, 86 * VECWIDTH + tbloffset), times(v489, load(tbl, 87 * VECWIDTH + tbloffset))); real2 v505 = timesminusplus(reverse(v491), load(tbl, 88 * VECWIDTH + tbloffset), times(v491, load(tbl, 89 * VECWIDTH + tbloffset))); real2 v567 = reverse(minus(v492, v493)); real2 v573 = plus(v492, v493); real2 v571 = minusplus(uminus(v567), v568); real2 v569 = minusplus(v567, v568); real2 v579 = timesminusplus(reverse(v569), load(tbl, 102 * VECWIDTH + tbloffset), times(v569, load(tbl, 103 * VECWIDTH + tbloffset))); real2 v585 = timesminusplus(reverse(v571), load(tbl, 104 * VECWIDTH + tbloffset), times(v571, load(tbl, 105 * VECWIDTH + tbloffset))); real2 v739 = plus(v585, v625); real2 v733 = reverse(minus(v585, v625)); real2 v707 = reverse(minus(v579, v619)); real2 v713 = plus(v579, v619); real2 v648 = minus(v573, v572); real2 v652 = plus(v572, v573); real2 v673 = plus(v652, v653); real2 v667 = reverse(minus(v652, v653)); real2 v651 = minusplus(uminus(v647), v648); real2 v649 = minusplus(v647, v648); real2 v659 = timesminusplus(reverse(v649), load(tbl, 118 * VECWIDTH + tbloffset), times(v649, load(tbl, 119 * VECWIDTH + tbloffset))); real2 v665 = timesminusplus(reverse(v651), load(tbl, 120 * VECWIDTH + tbloffset), times(v651, load(tbl, 121 * VECWIDTH + tbloffset))); real2 v780 = minus(v499, v419); real2 v784 = plus(v419, v499); real2 v781 = minusplus(v779, v780); real2 v783 = minusplus(uminus(v779), v780); real2 v805 = plus(v784, v785); real2 v799 = reverse(minus(v784, v785)); real2 v862 = plus(v425, v505); real2 v858 = minus(v505, v425); real2 v859 = minusplus(v857, v858); real2 v861 = minusplus(uminus(v857), v858); real2 v875 = timesminusplus(reverse(v861), load(tbl, 152 * VECWIDTH + tbloffset), times(v861, load(tbl, 153 * VECWIDTH + tbloffset))); real2 v791 = timesminusplus(reverse(v781), load(tbl, 138 * VECWIDTH + tbloffset), times(v781, load(tbl, 139 * VECWIDTH + tbloffset))); real2 v797 = timesminusplus(reverse(v783), load(tbl, 140 * VECWIDTH + tbloffset), times(v783, load(tbl, 141 * VECWIDTH + tbloffset))); real2 v883 = plus(v862, v863); real2 v877 = reverse(minus(v862, v863)); real2 v869 = timesminusplus(reverse(v859), load(tbl, 150 * VECWIDTH + tbloffset), times(v859, load(tbl, 151 * VECWIDTH + tbloffset))); real2 v36 = load(in, 34 << inShift); real2 v4 = load(in, 2 << inShift); real2 v108 = minus(v36, v4); real2 v112 = plus(v4, v36); real2 v52 = load(in, 50 << inShift); real2 v20 = load(in, 18 << inShift); real2 v113 = plus(v20, v52); real2 v107 = reverse(minus(v20, v52)); real2 v428 = minus(v113, v112); real2 v432 = plus(v112, v113); real2 v12 = load(in, 10 << inShift); real2 v44 = load(in, 42 << inShift); real2 v268 = minus(v44, v12); real2 v272 = plus(v12, v44); real2 v28 = load(in, 26 << inShift); real2 v60 = load(in, 58 << inShift); real2 v267 = reverse(minus(v28, v60)); real2 v273 = plus(v28, v60); real2 v427 = reverse(minus(v272, v273)); real2 v433 = plus(v272, v273); real2 v431 = minusplus(uminus(v427), v428); real2 v429 = minusplus(v427, v428); real2 v439 = timesminusplus(reverse(v429), load(tbl, 74 * VECWIDTH + tbloffset), times(v429, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v588 = minus(v433, v432); real2 v592 = plus(v432, v433); real2 v40 = load(in, 38 << inShift); real2 v8 = load(in, 6 << inShift); real2 v188 = minus(v40, v8); real2 v192 = plus(v8, v40); real2 v24 = load(in, 22 << inShift); real2 v56 = load(in, 54 << inShift); real2 v187 = reverse(minus(v24, v56)); real2 v193 = plus(v24, v56); real2 v512 = plus(v192, v193); real2 v508 = minus(v193, v192); real2 v32 = load(in, 30 << inShift); real2 v64 = load(in, 62 << inShift); real2 v347 = reverse(minus(v32, v64)); real2 v353 = plus(v32, v64); real2 v48 = load(in, 46 << inShift); real2 v16 = load(in, 14 << inShift); real2 v348 = minus(v48, v16); real2 v352 = plus(v16, v48); real2 v513 = plus(v352, v353); real2 v507 = reverse(minus(v352, v353)); real2 v587 = reverse(minus(v512, v513)); real2 v593 = plus(v512, v513); real2 v633 = plus(v592, v593); real2 v627 = reverse(minus(v592, v593)); real2 v591 = minusplus(uminus(v587), v588); real2 v589 = minusplus(v587, v588); real2 v605 = timesminusplus(reverse(v591), load(tbl, 108 * VECWIDTH + tbloffset), times(v591, load(tbl, 109 * VECWIDTH + tbloffset))); real2 v599 = timesminusplus(reverse(v589), load(tbl, 106 * VECWIDTH + tbloffset), times(v589, load(tbl, 107 * VECWIDTH + tbloffset))); real2 v46 = load(in, 44 << inShift); real2 v14 = load(in, 12 << inShift); real2 v312 = plus(v14, v46); real2 v308 = minus(v46, v14); real2 v62 = load(in, 60 << inShift); real2 v30 = load(in, 28 << inShift); real2 v313 = plus(v30, v62); real2 v307 = reverse(minus(v30, v62)); real2 v467 = reverse(minus(v312, v313)); real2 v473 = plus(v312, v313); real2 v22 = load(in, 20 << inShift); real2 v54 = load(in, 52 << inShift); real2 v147 = reverse(minus(v22, v54)); real2 v153 = plus(v22, v54); real2 v6 = load(in, 4 << inShift); real2 v38 = load(in, 36 << inShift); real2 v148 = minus(v38, v6); real2 v152 = plus(v6, v38); real2 v472 = plus(v152, v153); real2 v468 = minus(v153, v152); real2 v547 = reverse(minus(v472, v473)); real2 v553 = plus(v472, v473); real2 v10 = load(in, 8 << inShift); real2 v42 = load(in, 40 << inShift); real2 v232 = plus(v10, v42); real2 v228 = minus(v42, v10); real2 v58 = load(in, 56 << inShift); real2 v26 = load(in, 24 << inShift); real2 v233 = plus(v26, v58); real2 v227 = reverse(minus(v26, v58)); real2 v393 = plus(v232, v233); real2 v387 = reverse(minus(v232, v233)); real2 v2 = load(in, 0 << inShift); real2 v34 = load(in, 32 << inShift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v18 = load(in, 16 << inShift); real2 v50 = load(in, 48 << inShift); real2 v73 = plus(v18, v50); real2 v67 = reverse(minus(v18, v50)); real2 v388 = minus(v73, v72); real2 v392 = plus(v72, v73); real2 v548 = minus(v393, v392); real2 v552 = plus(v392, v393); real2 v628 = minus(v553, v552); real2 v632 = plus(v552, v553); real2 v672 = plus(v632, v633); real2 v668 = minus(v633, v632); scatter(out, 0, 64, plus(v672, v673)); real2 v686 = minus(v672, v673); scatter(out, 32, 64, timesminusplus(v686, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v686), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v669 = minusplus(v667, v668); real2 v671 = minusplus(uminus(v667), v668); scatter(out, 48, 64, timesminusplus(reverse(v671), load(tbl, 124 * VECWIDTH + tbloffset), times(v671, load(tbl, 125 * VECWIDTH + tbloffset)))); scatter(out, 16, 64, timesminusplus(reverse(v669), load(tbl, 122 * VECWIDTH + tbloffset), times(v669, load(tbl, 123 * VECWIDTH + tbloffset)))); real2 v631 = minusplus(uminus(v627), v628); real2 v629 = minusplus(v627, v628); real2 v639 = timesminusplus(reverse(v629), load(tbl, 114 * VECWIDTH + tbloffset), times(v629, load(tbl, 115 * VECWIDTH + tbloffset))); scatter(out, 8, 64, plus(v639, v659)); real2 v694 = minus(v639, v659); scatter(out, 40, 64, timesminusplus(v694, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v694), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v645 = timesminusplus(reverse(v631), load(tbl, 116 * VECWIDTH + tbloffset), times(v631, load(tbl, 117 * VECWIDTH + tbloffset))); scatter(out, 24, 64, plus(v645, v665)); real2 v700 = minus(v645, v665); scatter(out, 56, 64, timesminusplus(v700, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v700), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v549 = minusplus(v547, v548); real2 v551 = minusplus(uminus(v547), v548); real2 v559 = timesminusplus(reverse(v549), load(tbl, 98 * VECWIDTH + tbloffset), times(v549, load(tbl, 99 * VECWIDTH + tbloffset))); real2 v708 = minus(v599, v559); real2 v712 = plus(v559, v599); scatter(out, 4, 64, plus(v712, v713)); real2 v726 = minus(v712, v713); scatter(out, 36, 64, timesminusplus(v726, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v726), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v711 = minusplus(uminus(v707), v708); real2 v709 = minusplus(v707, v708); scatter(out, 20, 64, timesminusplus(reverse(v709), load(tbl, 126 * VECWIDTH + tbloffset), times(v709, load(tbl, 127 * VECWIDTH + tbloffset)))); scatter(out, 52, 64, timesminusplus(reverse(v711), load(tbl, 128 * VECWIDTH + tbloffset), times(v711, load(tbl, 129 * VECWIDTH + tbloffset)))); real2 v565 = timesminusplus(reverse(v551), load(tbl, 100 * VECWIDTH + tbloffset), times(v551, load(tbl, 101 * VECWIDTH + tbloffset))); real2 v738 = plus(v565, v605); real2 v734 = minus(v605, v565); scatter(out, 12, 64, plus(v738, v739)); real2 v752 = minus(v738, v739); scatter(out, 44, 64, timesminusplus(v752, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v752), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v737 = minusplus(uminus(v733), v734); scatter(out, 60, 64, timesminusplus(reverse(v737), load(tbl, 132 * VECWIDTH + tbloffset), times(v737, load(tbl, 133 * VECWIDTH + tbloffset)))); real2 v735 = minusplus(v733, v734); scatter(out, 28, 64, timesminusplus(reverse(v735), load(tbl, 130 * VECWIDTH + tbloffset), times(v735, load(tbl, 131 * VECWIDTH + tbloffset)))); real2 v471 = minusplus(uminus(v467), v468); real2 v469 = minusplus(v467, v468); real2 v479 = timesminusplus(reverse(v469), load(tbl, 82 * VECWIDTH + tbloffset), times(v469, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v511 = minusplus(uminus(v507), v508); real2 v509 = minusplus(v507, v508); real2 v519 = timesminusplus(reverse(v509), load(tbl, 90 * VECWIDTH + tbloffset), times(v509, load(tbl, 91 * VECWIDTH + tbloffset))); real2 v765 = plus(v439, v519); real2 v759 = reverse(minus(v439, v519)); real2 v389 = minusplus(v387, v388); real2 v391 = minusplus(uminus(v387), v388); real2 v399 = timesminusplus(reverse(v389), load(tbl, 66 * VECWIDTH + tbloffset), times(v389, load(tbl, 67 * VECWIDTH + tbloffset))); real2 v764 = plus(v399, v479); real2 v760 = minus(v479, v399); real2 v804 = plus(v764, v765); real2 v800 = minus(v765, v764); scatter(out, 2, 64, plus(v804, v805)); real2 v818 = minus(v804, v805); scatter(out, 34, 64, timesminusplus(v818, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v818), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v803 = minusplus(uminus(v799), v800); scatter(out, 50, 64, timesminusplus(reverse(v803), load(tbl, 144 * VECWIDTH + tbloffset), times(v803, load(tbl, 145 * VECWIDTH + tbloffset)))); real2 v801 = minusplus(v799, v800); scatter(out, 18, 64, timesminusplus(reverse(v801), load(tbl, 142 * VECWIDTH + tbloffset), times(v801, load(tbl, 143 * VECWIDTH + tbloffset)))); real2 v763 = minusplus(uminus(v759), v760); real2 v761 = minusplus(v759, v760); real2 v777 = timesminusplus(reverse(v763), load(tbl, 136 * VECWIDTH + tbloffset), times(v763, load(tbl, 137 * VECWIDTH + tbloffset))); scatter(out, 26, 64, plus(v777, v797)); real2 v830 = minus(v777, v797); scatter(out, 58, 64, timesminusplus(v830, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v830), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v771 = timesminusplus(reverse(v761), load(tbl, 134 * VECWIDTH + tbloffset), times(v761, load(tbl, 135 * VECWIDTH + tbloffset))); scatter(out, 10, 64, plus(v771, v791)); real2 v824 = minus(v771, v791); scatter(out, 42, 64, timesminusplus(v824, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v824), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v445 = timesminusplus(reverse(v431), load(tbl, 76 * VECWIDTH + tbloffset), times(v431, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v525 = timesminusplus(reverse(v511), load(tbl, 92 * VECWIDTH + tbloffset), times(v511, load(tbl, 93 * VECWIDTH + tbloffset))); real2 v837 = reverse(minus(v445, v525)); real2 v843 = plus(v445, v525); real2 v485 = timesminusplus(reverse(v471), load(tbl, 84 * VECWIDTH + tbloffset), times(v471, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v405 = timesminusplus(reverse(v391), load(tbl, 68 * VECWIDTH + tbloffset), times(v391, load(tbl, 69 * VECWIDTH + tbloffset))); real2 v838 = minus(v485, v405); real2 v842 = plus(v405, v485); real2 v878 = minus(v843, v842); real2 v882 = plus(v842, v843); scatter(out, 6, 64, plus(v882, v883)); real2 v896 = minus(v882, v883); scatter(out, 38, 64, timesminusplus(v896, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v896), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v881 = minusplus(uminus(v877), v878); scatter(out, 54, 64, timesminusplus(reverse(v881), load(tbl, 156 * VECWIDTH + tbloffset), times(v881, load(tbl, 157 * VECWIDTH + tbloffset)))); real2 v879 = minusplus(v877, v878); scatter(out, 22, 64, timesminusplus(reverse(v879), load(tbl, 154 * VECWIDTH + tbloffset), times(v879, load(tbl, 155 * VECWIDTH + tbloffset)))); real2 v841 = minusplus(uminus(v837), v838); real2 v839 = minusplus(v837, v838); real2 v855 = timesminusplus(reverse(v841), load(tbl, 148 * VECWIDTH + tbloffset), times(v841, load(tbl, 149 * VECWIDTH + tbloffset))); scatter(out, 30, 64, plus(v855, v875)); real2 v908 = minus(v855, v875); scatter(out, 62, 64, timesminusplus(v908, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v908), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v849 = timesminusplus(reverse(v839), load(tbl, 146 * VECWIDTH + tbloffset), times(v839, load(tbl, 147 * VECWIDTH + tbloffset))); scatter(out, 14, 64, plus(v849, v869)); real2 v902 = minus(v849, v869); scatter(out, 46, 64, timesminusplus(v902, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v902), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v151 = minusplus(uminus(v147), v148); real2 v149 = minusplus(v147, v148); real2 v311 = minusplus(uminus(v307), v308); real2 v309 = minusplus(v307, v308); real2 v109 = minusplus(v107, v108); real2 v111 = minusplus(uminus(v107), v108); real2 v119 = timesminusplus(reverse(v109), load(tbl, 10 * VECWIDTH + tbloffset), times(v109, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v269 = minusplus(v267, v268); real2 v271 = minusplus(uminus(v267), v268); real2 v279 = timesminusplus(reverse(v269), load(tbl, 42 * VECWIDTH + tbloffset), times(v269, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v960 = plus(v119, v279); real2 v956 = minus(v279, v119); real2 v169 = minusplus(v167, v168); real2 v171 = minusplus(uminus(v167), v168); real2 v159 = timesminusplus(reverse(v149), load(tbl, 18 * VECWIDTH + tbloffset), times(v149, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v319 = timesminusplus(reverse(v309), load(tbl, 50 * VECWIDTH + tbloffset), times(v309, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v921 = plus(v159, v319); real2 v915 = reverse(minus(v159, v319)); real2 v351 = minusplus(uminus(v347), v348); real2 v349 = minusplus(v347, v348); real2 v359 = timesminusplus(reverse(v349), load(tbl, 58 * VECWIDTH + tbloffset), times(v349, load(tbl, 59 * VECWIDTH + tbloffset))); real2 v191 = minusplus(uminus(v187), v188); real2 v189 = minusplus(v187, v188); real2 v199 = timesminusplus(reverse(v189), load(tbl, 26 * VECWIDTH + tbloffset), times(v189, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v961 = plus(v199, v359); real2 v955 = reverse(minus(v199, v359)); real2 v995 = reverse(minus(v960, v961)); real2 v1001 = plus(v960, v961); real2 v179 = timesminusplus(reverse(v169), load(tbl, 22 * VECWIDTH + tbloffset), times(v169, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v941 = plus(v179, v339); real2 v935 = reverse(minus(v179, v339)); real2 v1016 = minus(v941, v940); real2 v1020 = plus(v940, v941); real2 v71 = minusplus(uminus(v67), v68); real2 v69 = minusplus(v67, v68); real2 v79 = timesminusplus(reverse(v69), load(tbl, 2 * VECWIDTH + tbloffset), times(v69, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v1041 = plus(v1020, v1021); real2 v1035 = reverse(minus(v1020, v1021)); real2 v229 = minusplus(v227, v228); real2 v231 = minusplus(uminus(v227), v228); real2 v239 = timesminusplus(reverse(v229), load(tbl, 34 * VECWIDTH + tbloffset), times(v229, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v920 = plus(v79, v239); real2 v916 = minus(v239, v79); real2 v996 = minus(v921, v920); real2 v1000 = plus(v920, v921); real2 v1040 = plus(v1000, v1001); real2 v1036 = minus(v1001, v1000); scatter(out, 1, 64, plus(v1040, v1041)); real2 v1054 = minus(v1040, v1041); scatter(out, 33, 64, timesminusplus(v1054, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1054), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1037 = minusplus(v1035, v1036); real2 v1039 = minusplus(uminus(v1035), v1036); scatter(out, 49, 64, timesminusplus(reverse(v1039), load(tbl, 184 * VECWIDTH + tbloffset), times(v1039, load(tbl, 185 * VECWIDTH + tbloffset)))); scatter(out, 17, 64, timesminusplus(reverse(v1037), load(tbl, 182 * VECWIDTH + tbloffset), times(v1037, load(tbl, 183 * VECWIDTH + tbloffset)))); real2 v1017 = minusplus(v1015, v1016); real2 v1019 = minusplus(uminus(v1015), v1016); real2 v1033 = timesminusplus(reverse(v1019), load(tbl, 180 * VECWIDTH + tbloffset), times(v1019, load(tbl, 181 * VECWIDTH + tbloffset))); real2 v997 = minusplus(v995, v996); real2 v999 = minusplus(uminus(v995), v996); real2 v1013 = timesminusplus(reverse(v999), load(tbl, 176 * VECWIDTH + tbloffset), times(v999, load(tbl, 177 * VECWIDTH + tbloffset))); scatter(out, 25, 64, plus(v1013, v1033)); real2 v1066 = minus(v1013, v1033); scatter(out, 57, 64, timesminusplus(v1066, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1066), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1027 = timesminusplus(reverse(v1017), load(tbl, 178 * VECWIDTH + tbloffset), times(v1017, load(tbl, 179 * VECWIDTH + tbloffset))); real2 v1007 = timesminusplus(reverse(v997), load(tbl, 174 * VECWIDTH + tbloffset), times(v997, load(tbl, 175 * VECWIDTH + tbloffset))); scatter(out, 9, 64, plus(v1007, v1027)); real2 v1060 = minus(v1007, v1027); scatter(out, 41, 64, timesminusplus(v1060, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1060), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v937 = minusplus(v935, v936); real2 v939 = minusplus(uminus(v935), v936); real2 v959 = minusplus(uminus(v955), v956); real2 v957 = minusplus(v955, v956); real2 v967 = timesminusplus(reverse(v957), load(tbl, 166 * VECWIDTH + tbloffset), times(v957, load(tbl, 167 * VECWIDTH + tbloffset))); real2 v947 = timesminusplus(reverse(v937), load(tbl, 162 * VECWIDTH + tbloffset), times(v937, load(tbl, 163 * VECWIDTH + tbloffset))); real2 v919 = minusplus(uminus(v915), v916); real2 v917 = minusplus(v915, v916); real2 v1079 = plus(v947, v987); real2 v1073 = reverse(minus(v947, v987)); real2 v927 = timesminusplus(reverse(v917), load(tbl, 158 * VECWIDTH + tbloffset), times(v917, load(tbl, 159 * VECWIDTH + tbloffset))); real2 v1074 = minus(v967, v927); real2 v1078 = plus(v927, v967); scatter(out, 5, 64, plus(v1078, v1079)); real2 v1092 = minus(v1078, v1079); scatter(out, 37, 64, timesminusplus(v1092, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1092), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1075 = minusplus(v1073, v1074); scatter(out, 21, 64, timesminusplus(reverse(v1075), load(tbl, 186 * VECWIDTH + tbloffset), times(v1075, load(tbl, 187 * VECWIDTH + tbloffset)))); real2 v1077 = minusplus(uminus(v1073), v1074); scatter(out, 53, 64, timesminusplus(reverse(v1077), load(tbl, 188 * VECWIDTH + tbloffset), times(v1077, load(tbl, 189 * VECWIDTH + tbloffset)))); real2 v953 = timesminusplus(reverse(v939), load(tbl, 164 * VECWIDTH + tbloffset), times(v939, load(tbl, 165 * VECWIDTH + tbloffset))); real2 v1099 = reverse(minus(v953, v993)); real2 v1105 = plus(v953, v993); real2 v973 = timesminusplus(reverse(v959), load(tbl, 168 * VECWIDTH + tbloffset), times(v959, load(tbl, 169 * VECWIDTH + tbloffset))); real2 v933 = timesminusplus(reverse(v919), load(tbl, 160 * VECWIDTH + tbloffset), times(v919, load(tbl, 161 * VECWIDTH + tbloffset))); real2 v1104 = plus(v933, v973); real2 v1100 = minus(v973, v933); scatter(out, 13, 64, plus(v1104, v1105)); real2 v1118 = minus(v1104, v1105); scatter(out, 45, 64, timesminusplus(v1118, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1118), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1101 = minusplus(v1099, v1100); scatter(out, 29, 64, timesminusplus(reverse(v1101), load(tbl, 190 * VECWIDTH + tbloffset), times(v1101, load(tbl, 191 * VECWIDTH + tbloffset)))); real2 v1103 = minusplus(uminus(v1099), v1100); scatter(out, 61, 64, timesminusplus(reverse(v1103), load(tbl, 192 * VECWIDTH + tbloffset), times(v1103, load(tbl, 193 * VECWIDTH + tbloffset)))); real2 v345 = timesminusplus(reverse(v331), load(tbl, 56 * VECWIDTH + tbloffset), times(v331, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v325 = timesminusplus(reverse(v311), load(tbl, 52 * VECWIDTH + tbloffset), times(v311, load(tbl, 53 * VECWIDTH + tbloffset))); real2 v265 = timesminusplus(reverse(v251), load(tbl, 40 * VECWIDTH + tbloffset), times(v251, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v185 = timesminusplus(reverse(v171), load(tbl, 24 * VECWIDTH + tbloffset), times(v171, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v165 = timesminusplus(reverse(v151), load(tbl, 20 * VECWIDTH + tbloffset), times(v151, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v1131 = plus(v165, v325); real2 v1125 = reverse(minus(v165, v325)); real2 v1151 = plus(v185, v345); real2 v1145 = reverse(minus(v185, v345)); real2 v105 = timesminusplus(reverse(v91), load(tbl, 8 * VECWIDTH + tbloffset), times(v91, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v1150 = plus(v105, v265); real2 v1146 = minus(v265, v105); real2 v1226 = minus(v1151, v1150); real2 v1230 = plus(v1150, v1151); real2 v1231 = plus(v1190, v1191); real2 v1225 = reverse(minus(v1190, v1191)); real2 v1245 = reverse(minus(v1230, v1231)); real2 v1251 = plus(v1230, v1231); real2 v365 = timesminusplus(reverse(v351), load(tbl, 60 * VECWIDTH + tbloffset), times(v351, load(tbl, 61 * VECWIDTH + tbloffset))); real2 v285 = timesminusplus(reverse(v271), load(tbl, 44 * VECWIDTH + tbloffset), times(v271, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v205 = timesminusplus(reverse(v191), load(tbl, 28 * VECWIDTH + tbloffset), times(v191, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v1171 = plus(v205, v365); real2 v1165 = reverse(minus(v205, v365)); real2 v125 = timesminusplus(reverse(v111), load(tbl, 12 * VECWIDTH + tbloffset), times(v111, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v85 = timesminusplus(reverse(v71), load(tbl, 4 * VECWIDTH + tbloffset), times(v71, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v245 = timesminusplus(reverse(v231), load(tbl, 36 * VECWIDTH + tbloffset), times(v231, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v1126 = minus(v245, v85); real2 v1130 = plus(v85, v245); real2 v1210 = plus(v1130, v1131); real2 v1206 = minus(v1131, v1130); real2 v1166 = minus(v285, v125); real2 v1170 = plus(v125, v285); real2 v1211 = plus(v1170, v1171); real2 v1205 = reverse(minus(v1170, v1171)); real2 v1246 = minus(v1211, v1210); real2 v1250 = plus(v1210, v1211); scatter(out, 3, 64, plus(v1250, v1251)); real2 v1264 = minus(v1250, v1251); scatter(out, 35, 64, timesminusplus(v1264, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1264), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1247 = minusplus(v1245, v1246); real2 v1249 = minusplus(uminus(v1245), v1246); scatter(out, 19, 64, timesminusplus(reverse(v1247), load(tbl, 218 * VECWIDTH + tbloffset), times(v1247, load(tbl, 219 * VECWIDTH + tbloffset)))); scatter(out, 51, 64, timesminusplus(reverse(v1249), load(tbl, 220 * VECWIDTH + tbloffset), times(v1249, load(tbl, 221 * VECWIDTH + tbloffset)))); real2 v1229 = minusplus(uminus(v1225), v1226); real2 v1227 = minusplus(v1225, v1226); real2 v1207 = minusplus(v1205, v1206); real2 v1209 = minusplus(uminus(v1205), v1206); real2 v1237 = timesminusplus(reverse(v1227), load(tbl, 214 * VECWIDTH + tbloffset), times(v1227, load(tbl, 215 * VECWIDTH + tbloffset))); real2 v1217 = timesminusplus(reverse(v1207), load(tbl, 210 * VECWIDTH + tbloffset), times(v1207, load(tbl, 211 * VECWIDTH + tbloffset))); scatter(out, 11, 64, plus(v1217, v1237)); real2 v1270 = minus(v1217, v1237); scatter(out, 43, 64, timesminusplus(v1270, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1270), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1223 = timesminusplus(reverse(v1209), load(tbl, 212 * VECWIDTH + tbloffset), times(v1209, load(tbl, 213 * VECWIDTH + tbloffset))); real2 v1243 = timesminusplus(reverse(v1229), load(tbl, 216 * VECWIDTH + tbloffset), times(v1229, load(tbl, 217 * VECWIDTH + tbloffset))); scatter(out, 27, 64, plus(v1223, v1243)); real2 v1276 = minus(v1223, v1243); scatter(out, 59, 64, timesminusplus(v1276, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1276), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1189 = minusplus(uminus(v1185), v1186); real2 v1187 = minusplus(v1185, v1186); real2 v1129 = minusplus(uminus(v1125), v1126); real2 v1127 = minusplus(v1125, v1126); real2 v1147 = minusplus(v1145, v1146); real2 v1149 = minusplus(uminus(v1145), v1146); real2 v1167 = minusplus(v1165, v1166); real2 v1169 = minusplus(uminus(v1165), v1166); real2 v1143 = timesminusplus(reverse(v1129), load(tbl, 196 * VECWIDTH + tbloffset), times(v1129, load(tbl, 197 * VECWIDTH + tbloffset))); real2 v1163 = timesminusplus(reverse(v1149), load(tbl, 200 * VECWIDTH + tbloffset), times(v1149, load(tbl, 201 * VECWIDTH + tbloffset))); real2 v1203 = timesminusplus(reverse(v1189), load(tbl, 208 * VECWIDTH + tbloffset), times(v1189, load(tbl, 209 * VECWIDTH + tbloffset))); real2 v1315 = plus(v1163, v1203); real2 v1309 = reverse(minus(v1163, v1203)); real2 v1183 = timesminusplus(reverse(v1169), load(tbl, 204 * VECWIDTH + tbloffset), times(v1169, load(tbl, 205 * VECWIDTH + tbloffset))); real2 v1314 = plus(v1143, v1183); real2 v1310 = minus(v1183, v1143); scatter(out, 15, 64, plus(v1314, v1315)); real2 v1328 = minus(v1314, v1315); scatter(out, 47, 64, timesminusplus(v1328, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1328), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1311 = minusplus(v1309, v1310); scatter(out, 31, 64, timesminusplus(reverse(v1311), load(tbl, 226 * VECWIDTH + tbloffset), times(v1311, load(tbl, 227 * VECWIDTH + tbloffset)))); real2 v1313 = minusplus(uminus(v1309), v1310); scatter(out, 63, 64, timesminusplus(reverse(v1313), load(tbl, 228 * VECWIDTH + tbloffset), times(v1313, load(tbl, 229 * VECWIDTH + tbloffset)))); real2 v1177 = timesminusplus(reverse(v1167), load(tbl, 202 * VECWIDTH + tbloffset), times(v1167, load(tbl, 203 * VECWIDTH + tbloffset))); real2 v1137 = timesminusplus(reverse(v1127), load(tbl, 194 * VECWIDTH + tbloffset), times(v1127, load(tbl, 195 * VECWIDTH + tbloffset))); real2 v1197 = timesminusplus(reverse(v1187), load(tbl, 206 * VECWIDTH + tbloffset), times(v1187, load(tbl, 207 * VECWIDTH + tbloffset))); real2 v1157 = timesminusplus(reverse(v1147), load(tbl, 198 * VECWIDTH + tbloffset), times(v1147, load(tbl, 199 * VECWIDTH + tbloffset))); real2 v1283 = reverse(minus(v1157, v1197)); real2 v1289 = plus(v1157, v1197); real2 v1288 = plus(v1137, v1177); real2 v1284 = minus(v1177, v1137); scatter(out, 7, 64, plus(v1288, v1289)); real2 v1302 = minus(v1288, v1289); scatter(out, 39, 64, timesminusplus(v1302, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1302), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1285 = minusplus(v1283, v1284); real2 v1287 = minusplus(uminus(v1283), v1284); scatter(out, 55, 64, timesminusplus(reverse(v1287), load(tbl, 224 * VECWIDTH + tbloffset), times(v1287, load(tbl, 225 * VECWIDTH + tbloffset)))); scatter(out, 23, 64, timesminusplus(reverse(v1285), load(tbl, 222 * VECWIDTH + tbloffset), times(v1285, load(tbl, 223 * VECWIDTH + tbloffset)))); // Pres : 17339 } } ALIGNED(8192) void tbut64b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; // Pres : 30254 real2 v37 = load(in, 35 << inShift); real2 v5 = load(in, 3 << inShift); real2 v132 = plus(v5, v37); real2 v128 = minus(v37, v5); real2 v21 = load(in, 19 << inShift); real2 v53 = load(in, 51 << inShift); real2 v133 = plus(v21, v53); real2 v127 = reverse(minus(v53, v21)); real2 v131 = minusplus(uminus(v127), v128); real2 v129 = minusplus(v127, v128); real2 v139 = timesminusplus(reverse(v129), load(tbl, 14 * VECWIDTH + tbloffset), times(v129, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v145 = timesminusplus(reverse(v131), load(tbl, 16 * VECWIDTH + tbloffset), times(v131, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v448 = minus(v133, v132); real2 v452 = plus(v132, v133); real2 v45 = load(in, 43 << inShift); real2 v13 = load(in, 11 << inShift); real2 v292 = plus(v13, v45); real2 v288 = minus(v45, v13); real2 v29 = load(in, 27 << inShift); real2 v61 = load(in, 59 << inShift); real2 v293 = plus(v29, v61); real2 v287 = reverse(minus(v61, v29)); real2 v291 = minusplus(uminus(v287), v288); real2 v289 = minusplus(v287, v288); real2 v299 = timesminusplus(reverse(v289), load(tbl, 46 * VECWIDTH + tbloffset), times(v289, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v453 = plus(v292, v293); real2 v447 = reverse(minus(v293, v292)); real2 v608 = minus(v453, v452); real2 v612 = plus(v452, v453); real2 v980 = plus(v139, v299); real2 v976 = minus(v299, v139); real2 v449 = minusplus(v447, v448); real2 v451 = minusplus(uminus(v447), v448); real2 v465 = timesminusplus(reverse(v451), load(tbl, 80 * VECWIDTH + tbloffset), times(v451, load(tbl, 81 * VECWIDTH + tbloffset))); real2 v305 = timesminusplus(reverse(v291), load(tbl, 48 * VECWIDTH + tbloffset), times(v291, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v1186 = minus(v305, v145); real2 v1190 = plus(v145, v305); real2 v459 = timesminusplus(reverse(v449), load(tbl, 78 * VECWIDTH + tbloffset), times(v449, load(tbl, 79 * VECWIDTH + tbloffset))); real2 v25 = load(in, 23 << inShift); real2 v57 = load(in, 55 << inShift); real2 v207 = reverse(minus(v57, v25)); real2 v213 = plus(v25, v57); real2 v9 = load(in, 7 << inShift); real2 v41 = load(in, 39 << inShift); real2 v212 = plus(v9, v41); real2 v208 = minus(v41, v9); real2 v528 = minus(v213, v212); real2 v532 = plus(v212, v213); real2 v209 = minusplus(v207, v208); real2 v211 = minusplus(uminus(v207), v208); real2 v225 = timesminusplus(reverse(v211), load(tbl, 32 * VECWIDTH + tbloffset), times(v211, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v219 = timesminusplus(reverse(v209), load(tbl, 30 * VECWIDTH + tbloffset), times(v209, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v17 = load(in, 15 << inShift); real2 v49 = load(in, 47 << inShift); real2 v368 = minus(v49, v17); real2 v372 = plus(v17, v49); real2 v33 = load(in, 31 << inShift); real2 v65 = load(in, 63 << inShift); real2 v367 = reverse(minus(v65, v33)); real2 v373 = plus(v33, v65); real2 v369 = minusplus(v367, v368); real2 v371 = minusplus(uminus(v367), v368); real2 v533 = plus(v372, v373); real2 v527 = reverse(minus(v373, v372)); real2 v607 = reverse(minus(v533, v532)); real2 v613 = plus(v532, v533); real2 v529 = minusplus(v527, v528); real2 v531 = minusplus(uminus(v527), v528); real2 v545 = timesminusplus(reverse(v531), load(tbl, 96 * VECWIDTH + tbloffset), times(v531, load(tbl, 97 * VECWIDTH + tbloffset))); real2 v653 = plus(v612, v613); real2 v647 = reverse(minus(v613, v612)); real2 v609 = minusplus(v607, v608); real2 v611 = minusplus(uminus(v607), v608); real2 v863 = plus(v465, v545); real2 v857 = reverse(minus(v545, v465)); real2 v539 = timesminusplus(reverse(v529), load(tbl, 94 * VECWIDTH + tbloffset), times(v529, load(tbl, 95 * VECWIDTH + tbloffset))); real2 v385 = timesminusplus(reverse(v371), load(tbl, 64 * VECWIDTH + tbloffset), times(v371, load(tbl, 65 * VECWIDTH + tbloffset))); real2 v619 = timesminusplus(reverse(v609), load(tbl, 110 * VECWIDTH + tbloffset), times(v609, load(tbl, 111 * VECWIDTH + tbloffset))); real2 v1191 = plus(v225, v385); real2 v1185 = reverse(minus(v385, v225)); real2 v779 = reverse(minus(v539, v459)); real2 v785 = plus(v459, v539); real2 v625 = timesminusplus(reverse(v611), load(tbl, 112 * VECWIDTH + tbloffset), times(v611, load(tbl, 113 * VECWIDTH + tbloffset))); real2 v379 = timesminusplus(reverse(v369), load(tbl, 62 * VECWIDTH + tbloffset), times(v369, load(tbl, 63 * VECWIDTH + tbloffset))); real2 v975 = reverse(minus(v379, v219)); real2 v981 = plus(v219, v379); real2 v977 = minusplus(v975, v976); real2 v979 = minusplus(uminus(v975), v976); real2 v987 = timesminusplus(reverse(v977), load(tbl, 170 * VECWIDTH + tbloffset), times(v977, load(tbl, 171 * VECWIDTH + tbloffset))); real2 v993 = timesminusplus(reverse(v979), load(tbl, 172 * VECWIDTH + tbloffset), times(v979, load(tbl, 173 * VECWIDTH + tbloffset))); real2 v1015 = reverse(minus(v981, v980)); real2 v1021 = plus(v980, v981); real2 v11 = load(in, 9 << inShift); real2 v43 = load(in, 41 << inShift); real2 v248 = minus(v43, v11); real2 v252 = plus(v11, v43); real2 v59 = load(in, 57 << inShift); real2 v27 = load(in, 25 << inShift); real2 v253 = plus(v27, v59); real2 v247 = reverse(minus(v59, v27)); real2 v413 = plus(v252, v253); real2 v407 = reverse(minus(v253, v252)); real2 v249 = minusplus(v247, v248); real2 v251 = minusplus(uminus(v247), v248); real2 v259 = timesminusplus(reverse(v249), load(tbl, 38 * VECWIDTH + tbloffset), times(v249, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v35 = load(in, 33 << inShift); real2 v3 = load(in, 1 << inShift); real2 v92 = plus(v3, v35); real2 v88 = minus(v35, v3); real2 v51 = load(in, 49 << inShift); real2 v19 = load(in, 17 << inShift); real2 v87 = reverse(minus(v51, v19)); real2 v93 = plus(v19, v51); real2 v412 = plus(v92, v93); real2 v408 = minus(v93, v92); real2 v411 = minusplus(uminus(v407), v408); real2 v409 = minusplus(v407, v408); real2 v91 = minusplus(uminus(v87), v88); real2 v89 = minusplus(v87, v88); real2 v99 = timesminusplus(reverse(v89), load(tbl, 6 * VECWIDTH + tbloffset), times(v89, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v425 = timesminusplus(reverse(v411), load(tbl, 72 * VECWIDTH + tbloffset), times(v411, load(tbl, 73 * VECWIDTH + tbloffset))); real2 v568 = minus(v413, v412); real2 v572 = plus(v412, v413); real2 v940 = plus(v99, v259); real2 v936 = minus(v259, v99); real2 v419 = timesminusplus(reverse(v409), load(tbl, 70 * VECWIDTH + tbloffset), times(v409, load(tbl, 71 * VECWIDTH + tbloffset))); real2 v47 = load(in, 45 << inShift); real2 v15 = load(in, 13 << inShift); real2 v332 = plus(v15, v47); real2 v328 = minus(v47, v15); real2 v63 = load(in, 61 << inShift); real2 v31 = load(in, 29 << inShift); real2 v327 = reverse(minus(v63, v31)); real2 v333 = plus(v31, v63); real2 v329 = minusplus(v327, v328); real2 v331 = minusplus(uminus(v327), v328); real2 v339 = timesminusplus(reverse(v329), load(tbl, 54 * VECWIDTH + tbloffset), times(v329, load(tbl, 55 * VECWIDTH + tbloffset))); real2 v487 = reverse(minus(v333, v332)); real2 v493 = plus(v332, v333); real2 v7 = load(in, 5 << inShift); real2 v39 = load(in, 37 << inShift); real2 v172 = plus(v7, v39); real2 v168 = minus(v39, v7); real2 v55 = load(in, 53 << inShift); real2 v23 = load(in, 21 << inShift); real2 v173 = plus(v23, v55); real2 v167 = reverse(minus(v55, v23)); real2 v488 = minus(v173, v172); real2 v492 = plus(v172, v173); real2 v491 = minusplus(uminus(v487), v488); real2 v489 = minusplus(v487, v488); real2 v499 = timesminusplus(reverse(v489), load(tbl, 86 * VECWIDTH + tbloffset), times(v489, load(tbl, 87 * VECWIDTH + tbloffset))); real2 v505 = timesminusplus(reverse(v491), load(tbl, 88 * VECWIDTH + tbloffset), times(v491, load(tbl, 89 * VECWIDTH + tbloffset))); real2 v567 = reverse(minus(v493, v492)); real2 v573 = plus(v492, v493); real2 v571 = minusplus(uminus(v567), v568); real2 v569 = minusplus(v567, v568); real2 v579 = timesminusplus(reverse(v569), load(tbl, 102 * VECWIDTH + tbloffset), times(v569, load(tbl, 103 * VECWIDTH + tbloffset))); real2 v585 = timesminusplus(reverse(v571), load(tbl, 104 * VECWIDTH + tbloffset), times(v571, load(tbl, 105 * VECWIDTH + tbloffset))); real2 v739 = plus(v585, v625); real2 v733 = reverse(minus(v625, v585)); real2 v707 = reverse(minus(v619, v579)); real2 v713 = plus(v579, v619); real2 v648 = minus(v573, v572); real2 v652 = plus(v572, v573); real2 v673 = plus(v652, v653); real2 v667 = reverse(minus(v653, v652)); real2 v651 = minusplus(uminus(v647), v648); real2 v649 = minusplus(v647, v648); real2 v659 = timesminusplus(reverse(v649), load(tbl, 118 * VECWIDTH + tbloffset), times(v649, load(tbl, 119 * VECWIDTH + tbloffset))); real2 v665 = timesminusplus(reverse(v651), load(tbl, 120 * VECWIDTH + tbloffset), times(v651, load(tbl, 121 * VECWIDTH + tbloffset))); real2 v780 = minus(v499, v419); real2 v784 = plus(v419, v499); real2 v781 = minusplus(v779, v780); real2 v783 = minusplus(uminus(v779), v780); real2 v805 = plus(v784, v785); real2 v799 = reverse(minus(v785, v784)); real2 v862 = plus(v425, v505); real2 v858 = minus(v505, v425); real2 v859 = minusplus(v857, v858); real2 v861 = minusplus(uminus(v857), v858); real2 v875 = timesminusplus(reverse(v861), load(tbl, 152 * VECWIDTH + tbloffset), times(v861, load(tbl, 153 * VECWIDTH + tbloffset))); real2 v791 = timesminusplus(reverse(v781), load(tbl, 138 * VECWIDTH + tbloffset), times(v781, load(tbl, 139 * VECWIDTH + tbloffset))); real2 v797 = timesminusplus(reverse(v783), load(tbl, 140 * VECWIDTH + tbloffset), times(v783, load(tbl, 141 * VECWIDTH + tbloffset))); real2 v883 = plus(v862, v863); real2 v877 = reverse(minus(v863, v862)); real2 v869 = timesminusplus(reverse(v859), load(tbl, 150 * VECWIDTH + tbloffset), times(v859, load(tbl, 151 * VECWIDTH + tbloffset))); real2 v36 = load(in, 34 << inShift); real2 v4 = load(in, 2 << inShift); real2 v108 = minus(v36, v4); real2 v112 = plus(v4, v36); real2 v52 = load(in, 50 << inShift); real2 v20 = load(in, 18 << inShift); real2 v113 = plus(v20, v52); real2 v107 = reverse(minus(v52, v20)); real2 v428 = minus(v113, v112); real2 v432 = plus(v112, v113); real2 v12 = load(in, 10 << inShift); real2 v44 = load(in, 42 << inShift); real2 v268 = minus(v44, v12); real2 v272 = plus(v12, v44); real2 v28 = load(in, 26 << inShift); real2 v60 = load(in, 58 << inShift); real2 v267 = reverse(minus(v60, v28)); real2 v273 = plus(v28, v60); real2 v427 = reverse(minus(v273, v272)); real2 v433 = plus(v272, v273); real2 v431 = minusplus(uminus(v427), v428); real2 v429 = minusplus(v427, v428); real2 v439 = timesminusplus(reverse(v429), load(tbl, 74 * VECWIDTH + tbloffset), times(v429, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v588 = minus(v433, v432); real2 v592 = plus(v432, v433); real2 v40 = load(in, 38 << inShift); real2 v8 = load(in, 6 << inShift); real2 v188 = minus(v40, v8); real2 v192 = plus(v8, v40); real2 v24 = load(in, 22 << inShift); real2 v56 = load(in, 54 << inShift); real2 v187 = reverse(minus(v56, v24)); real2 v193 = plus(v24, v56); real2 v512 = plus(v192, v193); real2 v508 = minus(v193, v192); real2 v32 = load(in, 30 << inShift); real2 v64 = load(in, 62 << inShift); real2 v347 = reverse(minus(v64, v32)); real2 v353 = plus(v32, v64); real2 v48 = load(in, 46 << inShift); real2 v16 = load(in, 14 << inShift); real2 v348 = minus(v48, v16); real2 v352 = plus(v16, v48); real2 v513 = plus(v352, v353); real2 v507 = reverse(minus(v353, v352)); real2 v587 = reverse(minus(v513, v512)); real2 v593 = plus(v512, v513); real2 v633 = plus(v592, v593); real2 v627 = reverse(minus(v593, v592)); real2 v591 = minusplus(uminus(v587), v588); real2 v589 = minusplus(v587, v588); real2 v605 = timesminusplus(reverse(v591), load(tbl, 108 * VECWIDTH + tbloffset), times(v591, load(tbl, 109 * VECWIDTH + tbloffset))); real2 v599 = timesminusplus(reverse(v589), load(tbl, 106 * VECWIDTH + tbloffset), times(v589, load(tbl, 107 * VECWIDTH + tbloffset))); real2 v46 = load(in, 44 << inShift); real2 v14 = load(in, 12 << inShift); real2 v312 = plus(v14, v46); real2 v308 = minus(v46, v14); real2 v62 = load(in, 60 << inShift); real2 v30 = load(in, 28 << inShift); real2 v313 = plus(v30, v62); real2 v307 = reverse(minus(v62, v30)); real2 v467 = reverse(minus(v313, v312)); real2 v473 = plus(v312, v313); real2 v22 = load(in, 20 << inShift); real2 v54 = load(in, 52 << inShift); real2 v147 = reverse(minus(v54, v22)); real2 v153 = plus(v22, v54); real2 v6 = load(in, 4 << inShift); real2 v38 = load(in, 36 << inShift); real2 v148 = minus(v38, v6); real2 v152 = plus(v6, v38); real2 v472 = plus(v152, v153); real2 v468 = minus(v153, v152); real2 v547 = reverse(minus(v473, v472)); real2 v553 = plus(v472, v473); real2 v10 = load(in, 8 << inShift); real2 v42 = load(in, 40 << inShift); real2 v232 = plus(v10, v42); real2 v228 = minus(v42, v10); real2 v58 = load(in, 56 << inShift); real2 v26 = load(in, 24 << inShift); real2 v233 = plus(v26, v58); real2 v227 = reverse(minus(v58, v26)); real2 v393 = plus(v232, v233); real2 v387 = reverse(minus(v233, v232)); real2 v2 = load(in, 0 << inShift); real2 v34 = load(in, 32 << inShift); real2 v72 = plus(v2, v34); real2 v68 = minus(v34, v2); real2 v18 = load(in, 16 << inShift); real2 v50 = load(in, 48 << inShift); real2 v73 = plus(v18, v50); real2 v67 = reverse(minus(v50, v18)); real2 v388 = minus(v73, v72); real2 v392 = plus(v72, v73); real2 v548 = minus(v393, v392); real2 v552 = plus(v392, v393); real2 v628 = minus(v553, v552); real2 v632 = plus(v552, v553); real2 v672 = plus(v632, v633); real2 v668 = minus(v633, v632); scatter(out, 0, 64, plus(v672, v673)); real2 v686 = minus(v672, v673); scatter(out, 32, 64, timesminusplus(v686, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v686), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v669 = minusplus(v667, v668); real2 v671 = minusplus(uminus(v667), v668); scatter(out, 48, 64, timesminusplus(reverse(v671), load(tbl, 124 * VECWIDTH + tbloffset), times(v671, load(tbl, 125 * VECWIDTH + tbloffset)))); scatter(out, 16, 64, timesminusplus(reverse(v669), load(tbl, 122 * VECWIDTH + tbloffset), times(v669, load(tbl, 123 * VECWIDTH + tbloffset)))); real2 v631 = minusplus(uminus(v627), v628); real2 v629 = minusplus(v627, v628); real2 v639 = timesminusplus(reverse(v629), load(tbl, 114 * VECWIDTH + tbloffset), times(v629, load(tbl, 115 * VECWIDTH + tbloffset))); scatter(out, 8, 64, plus(v639, v659)); real2 v694 = minus(v639, v659); scatter(out, 40, 64, timesminusplus(v694, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v694), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v645 = timesminusplus(reverse(v631), load(tbl, 116 * VECWIDTH + tbloffset), times(v631, load(tbl, 117 * VECWIDTH + tbloffset))); scatter(out, 24, 64, plus(v645, v665)); real2 v700 = minus(v645, v665); scatter(out, 56, 64, timesminusplus(v700, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v700), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v549 = minusplus(v547, v548); real2 v551 = minusplus(uminus(v547), v548); real2 v559 = timesminusplus(reverse(v549), load(tbl, 98 * VECWIDTH + tbloffset), times(v549, load(tbl, 99 * VECWIDTH + tbloffset))); real2 v708 = minus(v599, v559); real2 v712 = plus(v559, v599); scatter(out, 4, 64, plus(v712, v713)); real2 v726 = minus(v712, v713); scatter(out, 36, 64, timesminusplus(v726, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v726), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v711 = minusplus(uminus(v707), v708); real2 v709 = minusplus(v707, v708); scatter(out, 20, 64, timesminusplus(reverse(v709), load(tbl, 126 * VECWIDTH + tbloffset), times(v709, load(tbl, 127 * VECWIDTH + tbloffset)))); scatter(out, 52, 64, timesminusplus(reverse(v711), load(tbl, 128 * VECWIDTH + tbloffset), times(v711, load(tbl, 129 * VECWIDTH + tbloffset)))); real2 v565 = timesminusplus(reverse(v551), load(tbl, 100 * VECWIDTH + tbloffset), times(v551, load(tbl, 101 * VECWIDTH + tbloffset))); real2 v738 = plus(v565, v605); real2 v734 = minus(v605, v565); scatter(out, 12, 64, plus(v738, v739)); real2 v752 = minus(v738, v739); scatter(out, 44, 64, timesminusplus(v752, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v752), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v737 = minusplus(uminus(v733), v734); scatter(out, 60, 64, timesminusplus(reverse(v737), load(tbl, 132 * VECWIDTH + tbloffset), times(v737, load(tbl, 133 * VECWIDTH + tbloffset)))); real2 v735 = minusplus(v733, v734); scatter(out, 28, 64, timesminusplus(reverse(v735), load(tbl, 130 * VECWIDTH + tbloffset), times(v735, load(tbl, 131 * VECWIDTH + tbloffset)))); real2 v471 = minusplus(uminus(v467), v468); real2 v469 = minusplus(v467, v468); real2 v479 = timesminusplus(reverse(v469), load(tbl, 82 * VECWIDTH + tbloffset), times(v469, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v511 = minusplus(uminus(v507), v508); real2 v509 = minusplus(v507, v508); real2 v519 = timesminusplus(reverse(v509), load(tbl, 90 * VECWIDTH + tbloffset), times(v509, load(tbl, 91 * VECWIDTH + tbloffset))); real2 v765 = plus(v439, v519); real2 v759 = reverse(minus(v519, v439)); real2 v389 = minusplus(v387, v388); real2 v391 = minusplus(uminus(v387), v388); real2 v399 = timesminusplus(reverse(v389), load(tbl, 66 * VECWIDTH + tbloffset), times(v389, load(tbl, 67 * VECWIDTH + tbloffset))); real2 v764 = plus(v399, v479); real2 v760 = minus(v479, v399); real2 v804 = plus(v764, v765); real2 v800 = minus(v765, v764); scatter(out, 2, 64, plus(v804, v805)); real2 v818 = minus(v804, v805); scatter(out, 34, 64, timesminusplus(v818, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v818), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v803 = minusplus(uminus(v799), v800); scatter(out, 50, 64, timesminusplus(reverse(v803), load(tbl, 144 * VECWIDTH + tbloffset), times(v803, load(tbl, 145 * VECWIDTH + tbloffset)))); real2 v801 = minusplus(v799, v800); scatter(out, 18, 64, timesminusplus(reverse(v801), load(tbl, 142 * VECWIDTH + tbloffset), times(v801, load(tbl, 143 * VECWIDTH + tbloffset)))); real2 v763 = minusplus(uminus(v759), v760); real2 v761 = minusplus(v759, v760); real2 v777 = timesminusplus(reverse(v763), load(tbl, 136 * VECWIDTH + tbloffset), times(v763, load(tbl, 137 * VECWIDTH + tbloffset))); scatter(out, 26, 64, plus(v777, v797)); real2 v830 = minus(v777, v797); scatter(out, 58, 64, timesminusplus(v830, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v830), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v771 = timesminusplus(reverse(v761), load(tbl, 134 * VECWIDTH + tbloffset), times(v761, load(tbl, 135 * VECWIDTH + tbloffset))); scatter(out, 10, 64, plus(v771, v791)); real2 v824 = minus(v771, v791); scatter(out, 42, 64, timesminusplus(v824, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v824), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v445 = timesminusplus(reverse(v431), load(tbl, 76 * VECWIDTH + tbloffset), times(v431, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v525 = timesminusplus(reverse(v511), load(tbl, 92 * VECWIDTH + tbloffset), times(v511, load(tbl, 93 * VECWIDTH + tbloffset))); real2 v837 = reverse(minus(v525, v445)); real2 v843 = plus(v445, v525); real2 v485 = timesminusplus(reverse(v471), load(tbl, 84 * VECWIDTH + tbloffset), times(v471, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v405 = timesminusplus(reverse(v391), load(tbl, 68 * VECWIDTH + tbloffset), times(v391, load(tbl, 69 * VECWIDTH + tbloffset))); real2 v838 = minus(v485, v405); real2 v842 = plus(v405, v485); real2 v878 = minus(v843, v842); real2 v882 = plus(v842, v843); scatter(out, 6, 64, plus(v882, v883)); real2 v896 = minus(v882, v883); scatter(out, 38, 64, timesminusplus(v896, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v896), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v881 = minusplus(uminus(v877), v878); scatter(out, 54, 64, timesminusplus(reverse(v881), load(tbl, 156 * VECWIDTH + tbloffset), times(v881, load(tbl, 157 * VECWIDTH + tbloffset)))); real2 v879 = minusplus(v877, v878); scatter(out, 22, 64, timesminusplus(reverse(v879), load(tbl, 154 * VECWIDTH + tbloffset), times(v879, load(tbl, 155 * VECWIDTH + tbloffset)))); real2 v841 = minusplus(uminus(v837), v838); real2 v839 = minusplus(v837, v838); real2 v855 = timesminusplus(reverse(v841), load(tbl, 148 * VECWIDTH + tbloffset), times(v841, load(tbl, 149 * VECWIDTH + tbloffset))); scatter(out, 30, 64, plus(v855, v875)); real2 v908 = minus(v855, v875); scatter(out, 62, 64, timesminusplus(v908, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v908), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v849 = timesminusplus(reverse(v839), load(tbl, 146 * VECWIDTH + tbloffset), times(v839, load(tbl, 147 * VECWIDTH + tbloffset))); scatter(out, 14, 64, plus(v849, v869)); real2 v902 = minus(v849, v869); scatter(out, 46, 64, timesminusplus(v902, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v902), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v151 = minusplus(uminus(v147), v148); real2 v149 = minusplus(v147, v148); real2 v311 = minusplus(uminus(v307), v308); real2 v309 = minusplus(v307, v308); real2 v109 = minusplus(v107, v108); real2 v111 = minusplus(uminus(v107), v108); real2 v119 = timesminusplus(reverse(v109), load(tbl, 10 * VECWIDTH + tbloffset), times(v109, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v269 = minusplus(v267, v268); real2 v271 = minusplus(uminus(v267), v268); real2 v279 = timesminusplus(reverse(v269), load(tbl, 42 * VECWIDTH + tbloffset), times(v269, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v960 = plus(v119, v279); real2 v956 = minus(v279, v119); real2 v169 = minusplus(v167, v168); real2 v171 = minusplus(uminus(v167), v168); real2 v159 = timesminusplus(reverse(v149), load(tbl, 18 * VECWIDTH + tbloffset), times(v149, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v319 = timesminusplus(reverse(v309), load(tbl, 50 * VECWIDTH + tbloffset), times(v309, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v921 = plus(v159, v319); real2 v915 = reverse(minus(v319, v159)); real2 v351 = minusplus(uminus(v347), v348); real2 v349 = minusplus(v347, v348); real2 v359 = timesminusplus(reverse(v349), load(tbl, 58 * VECWIDTH + tbloffset), times(v349, load(tbl, 59 * VECWIDTH + tbloffset))); real2 v191 = minusplus(uminus(v187), v188); real2 v189 = minusplus(v187, v188); real2 v199 = timesminusplus(reverse(v189), load(tbl, 26 * VECWIDTH + tbloffset), times(v189, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v961 = plus(v199, v359); real2 v955 = reverse(minus(v359, v199)); real2 v995 = reverse(minus(v961, v960)); real2 v1001 = plus(v960, v961); real2 v179 = timesminusplus(reverse(v169), load(tbl, 22 * VECWIDTH + tbloffset), times(v169, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v941 = plus(v179, v339); real2 v935 = reverse(minus(v339, v179)); real2 v1016 = minus(v941, v940); real2 v1020 = plus(v940, v941); real2 v71 = minusplus(uminus(v67), v68); real2 v69 = minusplus(v67, v68); real2 v79 = timesminusplus(reverse(v69), load(tbl, 2 * VECWIDTH + tbloffset), times(v69, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v1041 = plus(v1020, v1021); real2 v1035 = reverse(minus(v1021, v1020)); real2 v229 = minusplus(v227, v228); real2 v231 = minusplus(uminus(v227), v228); real2 v239 = timesminusplus(reverse(v229), load(tbl, 34 * VECWIDTH + tbloffset), times(v229, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v920 = plus(v79, v239); real2 v916 = minus(v239, v79); real2 v996 = minus(v921, v920); real2 v1000 = plus(v920, v921); real2 v1040 = plus(v1000, v1001); real2 v1036 = minus(v1001, v1000); scatter(out, 1, 64, plus(v1040, v1041)); real2 v1054 = minus(v1040, v1041); scatter(out, 33, 64, timesminusplus(v1054, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1054), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1037 = minusplus(v1035, v1036); real2 v1039 = minusplus(uminus(v1035), v1036); scatter(out, 49, 64, timesminusplus(reverse(v1039), load(tbl, 184 * VECWIDTH + tbloffset), times(v1039, load(tbl, 185 * VECWIDTH + tbloffset)))); scatter(out, 17, 64, timesminusplus(reverse(v1037), load(tbl, 182 * VECWIDTH + tbloffset), times(v1037, load(tbl, 183 * VECWIDTH + tbloffset)))); real2 v1017 = minusplus(v1015, v1016); real2 v1019 = minusplus(uminus(v1015), v1016); real2 v1033 = timesminusplus(reverse(v1019), load(tbl, 180 * VECWIDTH + tbloffset), times(v1019, load(tbl, 181 * VECWIDTH + tbloffset))); real2 v997 = minusplus(v995, v996); real2 v999 = minusplus(uminus(v995), v996); real2 v1013 = timesminusplus(reverse(v999), load(tbl, 176 * VECWIDTH + tbloffset), times(v999, load(tbl, 177 * VECWIDTH + tbloffset))); scatter(out, 25, 64, plus(v1013, v1033)); real2 v1066 = minus(v1013, v1033); scatter(out, 57, 64, timesminusplus(v1066, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1066), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1027 = timesminusplus(reverse(v1017), load(tbl, 178 * VECWIDTH + tbloffset), times(v1017, load(tbl, 179 * VECWIDTH + tbloffset))); real2 v1007 = timesminusplus(reverse(v997), load(tbl, 174 * VECWIDTH + tbloffset), times(v997, load(tbl, 175 * VECWIDTH + tbloffset))); scatter(out, 9, 64, plus(v1007, v1027)); real2 v1060 = minus(v1007, v1027); scatter(out, 41, 64, timesminusplus(v1060, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1060), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v937 = minusplus(v935, v936); real2 v939 = minusplus(uminus(v935), v936); real2 v959 = minusplus(uminus(v955), v956); real2 v957 = minusplus(v955, v956); real2 v967 = timesminusplus(reverse(v957), load(tbl, 166 * VECWIDTH + tbloffset), times(v957, load(tbl, 167 * VECWIDTH + tbloffset))); real2 v947 = timesminusplus(reverse(v937), load(tbl, 162 * VECWIDTH + tbloffset), times(v937, load(tbl, 163 * VECWIDTH + tbloffset))); real2 v919 = minusplus(uminus(v915), v916); real2 v917 = minusplus(v915, v916); real2 v1079 = plus(v947, v987); real2 v1073 = reverse(minus(v987, v947)); real2 v927 = timesminusplus(reverse(v917), load(tbl, 158 * VECWIDTH + tbloffset), times(v917, load(tbl, 159 * VECWIDTH + tbloffset))); real2 v1074 = minus(v967, v927); real2 v1078 = plus(v927, v967); scatter(out, 5, 64, plus(v1078, v1079)); real2 v1092 = minus(v1078, v1079); scatter(out, 37, 64, timesminusplus(v1092, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1092), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1075 = minusplus(v1073, v1074); scatter(out, 21, 64, timesminusplus(reverse(v1075), load(tbl, 186 * VECWIDTH + tbloffset), times(v1075, load(tbl, 187 * VECWIDTH + tbloffset)))); real2 v1077 = minusplus(uminus(v1073), v1074); scatter(out, 53, 64, timesminusplus(reverse(v1077), load(tbl, 188 * VECWIDTH + tbloffset), times(v1077, load(tbl, 189 * VECWIDTH + tbloffset)))); real2 v953 = timesminusplus(reverse(v939), load(tbl, 164 * VECWIDTH + tbloffset), times(v939, load(tbl, 165 * VECWIDTH + tbloffset))); real2 v1099 = reverse(minus(v993, v953)); real2 v1105 = plus(v953, v993); real2 v973 = timesminusplus(reverse(v959), load(tbl, 168 * VECWIDTH + tbloffset), times(v959, load(tbl, 169 * VECWIDTH + tbloffset))); real2 v933 = timesminusplus(reverse(v919), load(tbl, 160 * VECWIDTH + tbloffset), times(v919, load(tbl, 161 * VECWIDTH + tbloffset))); real2 v1104 = plus(v933, v973); real2 v1100 = minus(v973, v933); scatter(out, 13, 64, plus(v1104, v1105)); real2 v1118 = minus(v1104, v1105); scatter(out, 45, 64, timesminusplus(v1118, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1118), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1101 = minusplus(v1099, v1100); scatter(out, 29, 64, timesminusplus(reverse(v1101), load(tbl, 190 * VECWIDTH + tbloffset), times(v1101, load(tbl, 191 * VECWIDTH + tbloffset)))); real2 v1103 = minusplus(uminus(v1099), v1100); scatter(out, 61, 64, timesminusplus(reverse(v1103), load(tbl, 192 * VECWIDTH + tbloffset), times(v1103, load(tbl, 193 * VECWIDTH + tbloffset)))); real2 v345 = timesminusplus(reverse(v331), load(tbl, 56 * VECWIDTH + tbloffset), times(v331, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v325 = timesminusplus(reverse(v311), load(tbl, 52 * VECWIDTH + tbloffset), times(v311, load(tbl, 53 * VECWIDTH + tbloffset))); real2 v265 = timesminusplus(reverse(v251), load(tbl, 40 * VECWIDTH + tbloffset), times(v251, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v185 = timesminusplus(reverse(v171), load(tbl, 24 * VECWIDTH + tbloffset), times(v171, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v165 = timesminusplus(reverse(v151), load(tbl, 20 * VECWIDTH + tbloffset), times(v151, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v1131 = plus(v165, v325); real2 v1125 = reverse(minus(v325, v165)); real2 v1151 = plus(v185, v345); real2 v1145 = reverse(minus(v345, v185)); real2 v105 = timesminusplus(reverse(v91), load(tbl, 8 * VECWIDTH + tbloffset), times(v91, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v1150 = plus(v105, v265); real2 v1146 = minus(v265, v105); real2 v1226 = minus(v1151, v1150); real2 v1230 = plus(v1150, v1151); real2 v1231 = plus(v1190, v1191); real2 v1225 = reverse(minus(v1191, v1190)); real2 v1245 = reverse(minus(v1231, v1230)); real2 v1251 = plus(v1230, v1231); real2 v365 = timesminusplus(reverse(v351), load(tbl, 60 * VECWIDTH + tbloffset), times(v351, load(tbl, 61 * VECWIDTH + tbloffset))); real2 v285 = timesminusplus(reverse(v271), load(tbl, 44 * VECWIDTH + tbloffset), times(v271, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v205 = timesminusplus(reverse(v191), load(tbl, 28 * VECWIDTH + tbloffset), times(v191, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v1171 = plus(v205, v365); real2 v1165 = reverse(minus(v365, v205)); real2 v125 = timesminusplus(reverse(v111), load(tbl, 12 * VECWIDTH + tbloffset), times(v111, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v85 = timesminusplus(reverse(v71), load(tbl, 4 * VECWIDTH + tbloffset), times(v71, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v245 = timesminusplus(reverse(v231), load(tbl, 36 * VECWIDTH + tbloffset), times(v231, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v1126 = minus(v245, v85); real2 v1130 = plus(v85, v245); real2 v1210 = plus(v1130, v1131); real2 v1206 = minus(v1131, v1130); real2 v1166 = minus(v285, v125); real2 v1170 = plus(v125, v285); real2 v1211 = plus(v1170, v1171); real2 v1205 = reverse(minus(v1171, v1170)); real2 v1246 = minus(v1211, v1210); real2 v1250 = plus(v1210, v1211); scatter(out, 3, 64, plus(v1250, v1251)); real2 v1264 = minus(v1250, v1251); scatter(out, 35, 64, timesminusplus(v1264, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1264), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1247 = minusplus(v1245, v1246); real2 v1249 = minusplus(uminus(v1245), v1246); scatter(out, 19, 64, timesminusplus(reverse(v1247), load(tbl, 218 * VECWIDTH + tbloffset), times(v1247, load(tbl, 219 * VECWIDTH + tbloffset)))); scatter(out, 51, 64, timesminusplus(reverse(v1249), load(tbl, 220 * VECWIDTH + tbloffset), times(v1249, load(tbl, 221 * VECWIDTH + tbloffset)))); real2 v1229 = minusplus(uminus(v1225), v1226); real2 v1227 = minusplus(v1225, v1226); real2 v1207 = minusplus(v1205, v1206); real2 v1209 = minusplus(uminus(v1205), v1206); real2 v1237 = timesminusplus(reverse(v1227), load(tbl, 214 * VECWIDTH + tbloffset), times(v1227, load(tbl, 215 * VECWIDTH + tbloffset))); real2 v1217 = timesminusplus(reverse(v1207), load(tbl, 210 * VECWIDTH + tbloffset), times(v1207, load(tbl, 211 * VECWIDTH + tbloffset))); scatter(out, 11, 64, plus(v1217, v1237)); real2 v1270 = minus(v1217, v1237); scatter(out, 43, 64, timesminusplus(v1270, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1270), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1223 = timesminusplus(reverse(v1209), load(tbl, 212 * VECWIDTH + tbloffset), times(v1209, load(tbl, 213 * VECWIDTH + tbloffset))); real2 v1243 = timesminusplus(reverse(v1229), load(tbl, 216 * VECWIDTH + tbloffset), times(v1229, load(tbl, 217 * VECWIDTH + tbloffset))); scatter(out, 27, 64, plus(v1223, v1243)); real2 v1276 = minus(v1223, v1243); scatter(out, 59, 64, timesminusplus(v1276, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1276), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1189 = minusplus(uminus(v1185), v1186); real2 v1187 = minusplus(v1185, v1186); real2 v1129 = minusplus(uminus(v1125), v1126); real2 v1127 = minusplus(v1125, v1126); real2 v1147 = minusplus(v1145, v1146); real2 v1149 = minusplus(uminus(v1145), v1146); real2 v1167 = minusplus(v1165, v1166); real2 v1169 = minusplus(uminus(v1165), v1166); real2 v1143 = timesminusplus(reverse(v1129), load(tbl, 196 * VECWIDTH + tbloffset), times(v1129, load(tbl, 197 * VECWIDTH + tbloffset))); real2 v1163 = timesminusplus(reverse(v1149), load(tbl, 200 * VECWIDTH + tbloffset), times(v1149, load(tbl, 201 * VECWIDTH + tbloffset))); real2 v1203 = timesminusplus(reverse(v1189), load(tbl, 208 * VECWIDTH + tbloffset), times(v1189, load(tbl, 209 * VECWIDTH + tbloffset))); real2 v1315 = plus(v1163, v1203); real2 v1309 = reverse(minus(v1203, v1163)); real2 v1183 = timesminusplus(reverse(v1169), load(tbl, 204 * VECWIDTH + tbloffset), times(v1169, load(tbl, 205 * VECWIDTH + tbloffset))); real2 v1314 = plus(v1143, v1183); real2 v1310 = minus(v1183, v1143); scatter(out, 15, 64, plus(v1314, v1315)); real2 v1328 = minus(v1314, v1315); scatter(out, 47, 64, timesminusplus(v1328, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1328), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1311 = minusplus(v1309, v1310); scatter(out, 31, 64, timesminusplus(reverse(v1311), load(tbl, 226 * VECWIDTH + tbloffset), times(v1311, load(tbl, 227 * VECWIDTH + tbloffset)))); real2 v1313 = minusplus(uminus(v1309), v1310); scatter(out, 63, 64, timesminusplus(reverse(v1313), load(tbl, 228 * VECWIDTH + tbloffset), times(v1313, load(tbl, 229 * VECWIDTH + tbloffset)))); real2 v1177 = timesminusplus(reverse(v1167), load(tbl, 202 * VECWIDTH + tbloffset), times(v1167, load(tbl, 203 * VECWIDTH + tbloffset))); real2 v1137 = timesminusplus(reverse(v1127), load(tbl, 194 * VECWIDTH + tbloffset), times(v1127, load(tbl, 195 * VECWIDTH + tbloffset))); real2 v1197 = timesminusplus(reverse(v1187), load(tbl, 206 * VECWIDTH + tbloffset), times(v1187, load(tbl, 207 * VECWIDTH + tbloffset))); real2 v1157 = timesminusplus(reverse(v1147), load(tbl, 198 * VECWIDTH + tbloffset), times(v1147, load(tbl, 199 * VECWIDTH + tbloffset))); real2 v1283 = reverse(minus(v1197, v1157)); real2 v1289 = plus(v1157, v1197); real2 v1288 = plus(v1137, v1177); real2 v1284 = minus(v1177, v1137); scatter(out, 7, 64, plus(v1288, v1289)); real2 v1302 = minus(v1288, v1289); scatter(out, 39, 64, timesminusplus(v1302, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1302), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1285 = minusplus(v1283, v1284); real2 v1287 = minusplus(uminus(v1283), v1284); scatter(out, 55, 64, timesminusplus(reverse(v1287), load(tbl, 224 * VECWIDTH + tbloffset), times(v1287, load(tbl, 225 * VECWIDTH + tbloffset)))); scatter(out, 23, 64, timesminusplus(reverse(v1285), load(tbl, 222 * VECWIDTH + tbloffset), times(v1285, load(tbl, 223 * VECWIDTH + tbloffset)))); // Pres : 17339 } } #endif // #if MAXBUTWIDTH >= 7 ALIGNED(8192) void dft128f_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; // Pres : 129041 real2 v109 = load(in, 107 << shift); real2 v45 = load(in, 43 << shift); real2 v341 = plus(v45, v109); real2 v335 = reverse(minus(v45, v109)); real2 v77 = load(in, 75 << shift); real2 v13 = load(in, 11 << shift); real2 v340 = plus(v13, v77); real2 v336 = minus(v77, v13); real2 v337 = minusplus(v335, v336); real2 v339 = minusplus(uminus(v335), v336); real2 v350 = ctimesminusplus(reverse(v339), ctbl[61], ctimes(v339, ctbl[60])); real2 v886 = plus(v340, v341); real2 v882 = minus(v341, v340); real2 v345 = ctimesminusplus(reverse(v337), ctbl[51], ctimes(v337, ctbl[41])); real2 v125 = load(in, 123 << shift); real2 v61 = load(in, 59 << shift); real2 v621 = plus(v61, v125); real2 v615 = reverse(minus(v61, v125)); real2 v29 = load(in, 27 << shift); real2 v93 = load(in, 91 << shift); real2 v616 = minus(v93, v29); real2 v620 = plus(v29, v93); real2 v887 = plus(v620, v621); real2 v881 = reverse(minus(v620, v621)); real2 v1009 = reverse(minus(v886, v887)); real2 v1015 = plus(v886, v887); real2 v883 = minusplus(v881, v882); real2 v885 = minusplus(uminus(v881), v882); real2 v895 = ctimesminusplus(reverse(v885), ctbl[28], ctimes(v885, ctbl[14])); real2 v619 = minusplus(uminus(v615), v616); real2 v617 = minusplus(v615, v616); real2 v625 = ctimesminusplus(reverse(v617), ctbl[53], ctimes(v617, ctbl[39])); real2 v891 = ctimesminusplus(reverse(v883), ctbl[25], ctimes(v883, ctbl[19])); real2 v631 = ctimesminusplus(reverse(v619), ctbl[58], ctimes(v619, ctbl[32])); real2 v2129 = reverse(minus(v350, v631)); real2 v2135 = plus(v350, v631); real2 v1728 = reverse(minus(v345, v625)); real2 v1734 = plus(v345, v625); real2 v5 = load(in, 3 << shift); real2 v69 = load(in, 67 << shift); real2 v192 = plus(v5, v69); real2 v188 = minus(v69, v5); real2 v37 = load(in, 35 << shift); real2 v101 = load(in, 99 << shift); real2 v193 = plus(v37, v101); real2 v187 = reverse(minus(v37, v101)); real2 v758 = plus(v192, v193); real2 v754 = minus(v193, v192); real2 v189 = minusplus(v187, v188); real2 v191 = minusplus(uminus(v187), v188); real2 v203 = ctimesminusplus(reverse(v191), ctbl[35], ctimes(v191, ctbl[57])); real2 v197 = ctimesminusplus(reverse(v189), ctbl[47], ctimes(v189, ctbl[45])); real2 v53 = load(in, 51 << shift); real2 v117 = load(in, 115 << shift); real2 v474 = reverse(minus(v53, v117)); real2 v480 = plus(v53, v117); real2 v85 = load(in, 83 << shift); real2 v21 = load(in, 19 << shift); real2 v475 = minus(v85, v21); real2 v479 = plus(v21, v85); real2 v753 = reverse(minus(v479, v480)); real2 v759 = plus(v479, v480); real2 v755 = minusplus(v753, v754); real2 v757 = minusplus(uminus(v753), v754); real2 v767 = ctimesminusplus(reverse(v757), ctbl[17], ctimes(v757, ctbl[27])); real2 v763 = ctimesminusplus(reverse(v755), ctbl[23], ctimes(v755, ctbl[21])); real2 v1411 = minus(v891, v763); real2 v1415 = plus(v763, v891); real2 v1014 = plus(v758, v759); real2 v1010 = minus(v759, v758); real2 v1011 = minusplus(v1009, v1010); real2 v1013 = minusplus(uminus(v1009), v1010); real2 v1023 = ctimesminusplus(reverse(v1013), ctbl[13], ctimes(v1013, ctbl[12])); real2 v1570 = minus(v895, v767); real2 v1574 = plus(v767, v895); real2 v1142 = plus(v1014, v1015); real2 v1138 = minus(v1015, v1014); real2 v478 = minusplus(uminus(v474), v475); real2 v476 = minusplus(v474, v475); real2 v484 = ctimesminusplus(reverse(v476), ctbl[49], ctimes(v476, ctbl[43])); real2 v1733 = plus(v197, v484); real2 v1729 = minus(v484, v197); real2 v1861 = plus(v1733, v1734); real2 v1857 = minus(v1734, v1733); real2 v1730 = minusplus(v1728, v1729); real2 v1732 = minusplus(uminus(v1728), v1729); real2 v1738 = ctimesminusplus(reverse(v1730), ctbl[11], ctimes(v1730, ctbl[9])); real2 v489 = ctimesminusplus(reverse(v478), ctbl[55], ctimes(v478, ctbl[54])); real2 v1742 = ctimesminusplus(reverse(v1732), ctbl[13], ctimes(v1732, ctbl[12])); real2 v1019 = ctimesminusplus(reverse(v1011), ctbl[11], ctimes(v1011, ctbl[9])); real2 v2134 = plus(v203, v489); real2 v2130 = minus(v489, v203); real2 v2262 = plus(v2134, v2135); real2 v2258 = minus(v2135, v2134); real2 v105 = load(in, 103 << shift); real2 v41 = load(in, 39 << shift); real2 v269 = plus(v41, v105); real2 v263 = reverse(minus(v41, v105)); real2 v9 = load(in, 7 << shift); real2 v73 = load(in, 71 << shift); real2 v264 = minus(v73, v9); real2 v268 = plus(v9, v73); real2 v822 = plus(v268, v269); real2 v818 = minus(v269, v268); real2 v265 = minusplus(v263, v264); real2 v267 = minusplus(uminus(v263), v264); real2 v275 = ctimesminusplus(reverse(v265), ctbl[55], ctimes(v265, ctbl[37])); real2 v281 = ctimesminusplus(reverse(v267), ctbl[41], ctimes(v267, ctbl[51])); real2 v121 = load(in, 119 << shift); real2 v57 = load(in, 55 << shift); real2 v549 = plus(v57, v121); real2 v543 = reverse(minus(v57, v121)); real2 v25 = load(in, 23 << shift); real2 v89 = load(in, 87 << shift); real2 v544 = minus(v89, v25); real2 v548 = plus(v25, v89); real2 v817 = reverse(minus(v548, v549)); real2 v823 = plus(v548, v549); real2 v819 = minusplus(v817, v818); real2 v821 = minusplus(uminus(v817), v818); real2 v547 = minusplus(uminus(v543), v544); real2 v545 = minusplus(v543, v544); real2 v553 = ctimesminusplus(reverse(v545), ctbl[57], ctimes(v545, ctbl[35])); real2 v827 = ctimesminusplus(reverse(v819), ctbl[27], ctimes(v819, ctbl[17])); real2 v831 = ctimesminusplus(reverse(v821), ctbl[25], ctimes(v821, ctbl[24])); real2 v559 = ctimesminusplus(reverse(v547), ctbl[52], ctimes(v547, ctbl[38])); real2 v2198 = plus(v281, v559); real2 v2194 = minus(v559, v281); real2 v1793 = minus(v553, v275); real2 v1797 = plus(v275, v553); real2 v1078 = plus(v822, v823); real2 v1074 = minus(v823, v822); real2 v129 = load(in, 127 << shift); real2 v65 = load(in, 63 << shift); real2 v693 = plus(v65, v129); real2 v687 = reverse(minus(v65, v129)); real2 v33 = load(in, 31 << shift); real2 v97 = load(in, 95 << shift); real2 v692 = plus(v33, v97); real2 v688 = minus(v97, v33); real2 v691 = minusplus(uminus(v687), v688); real2 v689 = minusplus(v687, v688); real2 v945 = reverse(minus(v692, v693)); real2 v951 = plus(v692, v693); real2 v697 = ctimesminusplus(reverse(v689), ctbl[61], ctimes(v689, ctbl[31])); real2 v703 = ctimesminusplus(reverse(v691), ctbl[46], ctimes(v691, ctbl[44])); real2 v81 = load(in, 79 << shift); real2 v17 = load(in, 15 << shift); real2 v406 = minus(v81, v17); real2 v410 = plus(v17, v81); real2 v49 = load(in, 47 << shift); real2 v113 = load(in, 111 << shift); real2 v405 = reverse(minus(v49, v113)); real2 v411 = plus(v49, v113); real2 v407 = minusplus(v405, v406); real2 v409 = minusplus(uminus(v405), v406); real2 v415 = ctimesminusplus(reverse(v407), ctbl[59], ctimes(v407, ctbl[33])); real2 v1798 = plus(v415, v697); real2 v1792 = reverse(minus(v415, v697)); real2 v950 = plus(v410, v411); real2 v946 = minus(v411, v410); real2 v949 = minusplus(uminus(v945), v946); real2 v947 = minusplus(v945, v946); real2 v1073 = reverse(minus(v950, v951)); real2 v1079 = plus(v950, v951); real2 v955 = ctimesminusplus(reverse(v947), ctbl[29], ctimes(v947, ctbl[15])); real2 v1410 = reverse(minus(v827, v955)); real2 v1416 = plus(v827, v955); real2 v1448 = plus(v1415, v1416); real2 v1442 = reverse(minus(v1415, v1416)); real2 v1412 = minusplus(v1410, v1411); real2 v1414 = minusplus(uminus(v1410), v1411); real2 v1424 = ctimesminusplus(reverse(v1414), ctbl[4], ctimes(v1414, ctbl[2])); real2 v1077 = minusplus(uminus(v1073), v1074); real2 v1075 = minusplus(v1073, v1074); real2 v1087 = ctimesminusplus(reverse(v1077), ctbl[10], ctimes(v1077, ctbl[8])); real2 v1327 = plus(v1023, v1087); real2 v1321 = reverse(minus(v1023, v1087)); real2 v1137 = reverse(minus(v1078, v1079)); real2 v1143 = plus(v1078, v1079); real2 v1169 = reverse(minus(v1142, v1143)); real2 v1175 = plus(v1142, v1143); real2 v1083 = ctimesminusplus(reverse(v1075), ctbl[13], ctimes(v1075, ctbl[7])); real2 v1796 = minusplus(uminus(v1792), v1793); real2 v1794 = minusplus(v1792, v1793); real2 v1806 = ctimesminusplus(reverse(v1796), ctbl[10], ctimes(v1796, ctbl[8])); real2 v2046 = plus(v1742, v1806); real2 v2040 = reverse(minus(v1742, v1806)); real2 v1270 = plus(v1019, v1083); real2 v1264 = reverse(minus(v1019, v1083)); real2 v959 = ctimesminusplus(reverse(v949), ctbl[22], ctimes(v949, ctbl[20])); real2 v1139 = minusplus(v1137, v1138); real2 v1141 = minusplus(uminus(v1137), v1138); real2 v1151 = ctimesminusplus(reverse(v1141), ctbl[4], ctimes(v1141, ctbl[2])); real2 v1420 = ctimesminusplus(reverse(v1412), ctbl[5], ctimes(v1412, ctbl[3])); real2 v1569 = reverse(minus(v831, v959)); real2 v1575 = plus(v831, v959); real2 v1607 = plus(v1574, v1575); real2 v1601 = reverse(minus(v1574, v1575)); real2 v1856 = reverse(minus(v1797, v1798)); real2 v1862 = plus(v1797, v1798); real2 v1888 = reverse(minus(v1861, v1862)); real2 v1894 = plus(v1861, v1862); real2 v1147 = ctimesminusplus(reverse(v1139), ctbl[5], ctimes(v1139, ctbl[3])); real2 v1571 = minusplus(v1569, v1570); real2 v1573 = minusplus(uminus(v1569), v1570); real2 v1583 = ctimesminusplus(reverse(v1573), ctbl[4], ctimes(v1573, ctbl[2])); real2 v1858 = minusplus(v1856, v1857); real2 v1860 = minusplus(uminus(v1856), v1857); real2 v1870 = ctimesminusplus(reverse(v1860), ctbl[4], ctimes(v1860, ctbl[2])); real2 v1579 = ctimesminusplus(reverse(v1571), ctbl[5], ctimes(v1571, ctbl[3])); real2 v1802 = ctimesminusplus(reverse(v1794), ctbl[13], ctimes(v1794, ctbl[7])); real2 v1989 = plus(v1738, v1802); real2 v1983 = reverse(minus(v1738, v1802)); real2 v1866 = ctimesminusplus(reverse(v1858), ctbl[5], ctimes(v1858, ctbl[3])); real2 v23 = load(in, 21 << shift); real2 v87 = load(in, 85 << shift); real2 v513 = plus(v23, v87); real2 v509 = minus(v87, v23); real2 v55 = load(in, 53 << shift); real2 v119 = load(in, 117 << shift); real2 v514 = plus(v55, v119); real2 v508 = reverse(minus(v55, v119)); real2 v791 = plus(v513, v514); real2 v785 = reverse(minus(v513, v514)); real2 v512 = minusplus(uminus(v508), v509); real2 v510 = minusplus(v508, v509); real2 v518 = ctimesminusplus(reverse(v510), ctbl[41], ctimes(v510, ctbl[51])); real2 v71 = load(in, 69 << shift); real2 v7 = load(in, 5 << shift); real2 v230 = plus(v7, v71); real2 v226 = minus(v71, v7); real2 v103 = load(in, 101 << shift); real2 v39 = load(in, 37 << shift); real2 v225 = reverse(minus(v39, v103)); real2 v231 = plus(v39, v103); real2 v790 = plus(v230, v231); real2 v786 = minus(v231, v230); real2 v1042 = minus(v791, v790); real2 v1046 = plus(v790, v791); real2 v787 = minusplus(v785, v786); real2 v789 = minusplus(uminus(v785), v786); real2 v229 = minusplus(uminus(v225), v226); real2 v227 = minusplus(v225, v226); real2 v237 = ctimesminusplus(reverse(v227), ctbl[39], ctimes(v227, ctbl[53])); real2 v1761 = minus(v518, v237); real2 v1765 = plus(v237, v518); real2 v795 = ctimesminusplus(reverse(v787), ctbl[19], ctimes(v787, ctbl[25])); real2 v799 = ctimesminusplus(reverse(v789), ctbl[29], ctimes(v789, ctbl[15])); real2 v127 = load(in, 125 << shift); real2 v63 = load(in, 61 << shift); real2 v657 = plus(v63, v127); real2 v651 = reverse(minus(v63, v127)); real2 v31 = load(in, 29 << shift); real2 v95 = load(in, 93 << shift); real2 v652 = minus(v95, v31); real2 v656 = plus(v31, v95); real2 v913 = reverse(minus(v656, v657)); real2 v919 = plus(v656, v657); real2 v655 = minusplus(uminus(v651), v652); real2 v653 = minusplus(v651, v652); real2 v661 = ctimesminusplus(reverse(v653), ctbl[45], ctimes(v653, ctbl[47])); real2 v111 = load(in, 109 << shift); real2 v47 = load(in, 45 << shift); real2 v375 = plus(v47, v111); real2 v369 = reverse(minus(v47, v111)); real2 v79 = load(in, 77 << shift); real2 v15 = load(in, 13 << shift); real2 v370 = minus(v79, v15); real2 v374 = plus(v15, v79); real2 v914 = minus(v375, v374); real2 v918 = plus(v374, v375); real2 v371 = minusplus(v369, v370); real2 v373 = minusplus(uminus(v369), v370); real2 v915 = minusplus(v913, v914); real2 v917 = minusplus(uminus(v913), v914); real2 v927 = ctimesminusplus(reverse(v917), ctbl[16], ctimes(v917, ctbl[26])); real2 v381 = ctimesminusplus(reverse(v371), ctbl[43], ctimes(v371, ctbl[49])); real2 v1041 = reverse(minus(v918, v919)); real2 v1047 = plus(v918, v919); real2 v1766 = plus(v381, v661); real2 v1760 = reverse(minus(v381, v661)); real2 v1762 = minusplus(v1760, v1761); real2 v1764 = minusplus(uminus(v1760), v1761); real2 v1824 = reverse(minus(v1765, v1766)); real2 v1830 = plus(v1765, v1766); real2 v923 = ctimesminusplus(reverse(v915), ctbl[21], ctimes(v915, ctbl[23])); real2 v1378 = reverse(minus(v795, v923)); real2 v1384 = plus(v795, v923); real2 v1045 = minusplus(uminus(v1041), v1042); real2 v1043 = minusplus(v1041, v1042); real2 v1051 = ctimesminusplus(reverse(v1043), ctbl[9], ctimes(v1043, ctbl[11])); real2 v1537 = reverse(minus(v799, v927)); real2 v1543 = plus(v799, v927); real2 v1055 = ctimesminusplus(reverse(v1045), ctbl[7], ctimes(v1045, ctbl[6])); real2 v1111 = plus(v1046, v1047); real2 v1105 = reverse(minus(v1046, v1047)); real2 v115 = load(in, 113 << shift); real2 v51 = load(in, 49 << shift); real2 v440 = reverse(minus(v51, v115)); real2 v446 = plus(v51, v115); real2 v19 = load(in, 17 << shift); real2 v83 = load(in, 81 << shift); real2 v441 = minus(v83, v19); real2 v445 = plus(v19, v83); real2 v727 = plus(v445, v446); real2 v721 = reverse(minus(v445, v446)); real2 v442 = minusplus(v440, v441); real2 v444 = minusplus(uminus(v440), v441); real2 v450 = ctimesminusplus(reverse(v442), ctbl[33], ctimes(v442, ctbl[59])); real2 v67 = load(in, 65 << shift); real2 v3 = load(in, 1 << shift); real2 v148 = minus(v67, v3); real2 v152 = plus(v3, v67); real2 v99 = load(in, 97 << shift); real2 v35 = load(in, 33 << shift); real2 v147 = reverse(minus(v35, v99)); real2 v153 = plus(v35, v99); real2 v726 = plus(v152, v153); real2 v722 = minus(v153, v152); real2 v723 = minusplus(v721, v722); real2 v725 = minusplus(uminus(v721), v722); real2 v731 = ctimesminusplus(reverse(v723), ctbl[15], ctimes(v723, ctbl[29])); real2 v735 = ctimesminusplus(reverse(v725), ctbl[23], ctimes(v725, ctbl[21])); real2 v149 = minusplus(v147, v148); real2 v151 = minusplus(uminus(v147), v148); real2 v978 = minus(v727, v726); real2 v982 = plus(v726, v727); real2 v159 = ctimesminusplus(reverse(v149), ctbl[31], ctimes(v149, ctbl[61])); real2 v1701 = plus(v159, v450); real2 v1697 = minus(v450, v159); real2 v91 = load(in, 89 << shift); real2 v27 = load(in, 25 << shift); real2 v584 = plus(v27, v91); real2 v580 = minus(v91, v27); real2 v59 = load(in, 57 << shift); real2 v123 = load(in, 121 << shift); real2 v579 = reverse(minus(v59, v123)); real2 v585 = plus(v59, v123); real2 v583 = minusplus(uminus(v579), v580); real2 v581 = minusplus(v579, v580); real2 v589 = ctimesminusplus(reverse(v581), ctbl[37], ctimes(v581, ctbl[55])); real2 v855 = plus(v584, v585); real2 v849 = reverse(minus(v584, v585)); real2 v11 = load(in, 9 << shift); real2 v75 = load(in, 73 << shift); real2 v302 = minus(v75, v11); real2 v306 = plus(v11, v75); real2 v107 = load(in, 105 << shift); real2 v43 = load(in, 41 << shift); real2 v307 = plus(v43, v107); real2 v301 = reverse(minus(v43, v107)); real2 v854 = plus(v306, v307); real2 v850 = minus(v307, v306); real2 v851 = minusplus(v849, v850); real2 v853 = minusplus(uminus(v849), v850); real2 v863 = ctimesminusplus(reverse(v853), ctbl[19], ctimes(v853, ctbl[18])); real2 v305 = minusplus(uminus(v301), v302); real2 v303 = minusplus(v301, v302); real2 v1538 = minus(v863, v735); real2 v1542 = plus(v735, v863); real2 v859 = ctimesminusplus(reverse(v851), ctbl[17], ctimes(v851, ctbl[27])); real2 v1379 = minus(v859, v731); real2 v1383 = plus(v731, v859); real2 v1443 = minus(v1384, v1383); real2 v1447 = plus(v1383, v1384); real2 v1446 = minusplus(uminus(v1442), v1443); real2 v1444 = minusplus(v1442, v1443); real2 v983 = plus(v854, v855); real2 v977 = reverse(minus(v854, v855)); real2 v979 = minusplus(v977, v978); real2 v981 = minusplus(uminus(v977), v978); real2 v1456 = ctimesminusplus(reverse(v1446), ctbl[1], ctimes(v1446, ctbl[0])); real2 v311 = ctimesminusplus(reverse(v303), ctbl[35], ctimes(v303, ctbl[57])); real2 v1696 = reverse(minus(v311, v589)); real2 v1702 = plus(v311, v589); real2 v1452 = ctimesminusplus(reverse(v1444), ctbl[1], ctimes(v1444, ctbl[1])); real2 v987 = ctimesminusplus(reverse(v979), ctbl[7], ctimes(v979, ctbl[13])); real2 v1265 = minus(v1051, v987); real2 v1269 = plus(v987, v1051); real2 v1266 = minusplus(v1264, v1265); real2 v1268 = minusplus(uminus(v1264), v1265); real2 v1278 = ctimesminusplus(reverse(v1268), ctbl[1], ctimes(v1268, ctbl[0])); real2 v1286 = plus(v1269, v1270); real2 v1280 = reverse(minus(v1269, v1270)); real2 v1110 = plus(v982, v983); real2 v1106 = minus(v983, v982); real2 v1174 = plus(v1110, v1111); real2 v1170 = minus(v1111, v1110); real2 v1185 = reverse(minus(v1174, v1175)); real2 v1191 = plus(v1174, v1175); real2 v1171 = minusplus(v1169, v1170); real2 v1173 = minusplus(uminus(v1169), v1170); real2 v1179 = ctimesminusplus(reverse(v1171), ctbl[1], ctimes(v1171, ctbl[1])); real2 v1183 = ctimesminusplus(reverse(v1173), ctbl[1], ctimes(v1173, ctbl[0])); real2 v991 = ctimesminusplus(reverse(v981), ctbl[11], ctimes(v981, ctbl[9])); real2 v1322 = minus(v1055, v991); real2 v1326 = plus(v991, v1055); real2 v1337 = reverse(minus(v1326, v1327)); real2 v1343 = plus(v1326, v1327); real2 v1323 = minusplus(v1321, v1322); real2 v1325 = minusplus(uminus(v1321), v1322); real2 v1335 = ctimesminusplus(reverse(v1325), ctbl[1], ctimes(v1325, ctbl[0])); real2 v1109 = minusplus(uminus(v1105), v1106); real2 v1107 = minusplus(v1105, v1106); real2 v1115 = ctimesminusplus(reverse(v1107), ctbl[3], ctimes(v1107, ctbl[5])); real2 v1274 = ctimesminusplus(reverse(v1266), ctbl[1], ctimes(v1266, ctbl[1])); real2 v1606 = plus(v1542, v1543); real2 v1602 = minus(v1543, v1542); real2 v1216 = plus(v1115, v1147); real2 v1210 = reverse(minus(v1115, v1147)); real2 v1331 = ctimesminusplus(reverse(v1323), ctbl[1], ctimes(v1323, ctbl[1])); real2 v1119 = ctimesminusplus(reverse(v1109), ctbl[5], ctimes(v1109, ctbl[3])); real2 v1464 = plus(v1447, v1448); real2 v1458 = reverse(minus(v1447, v1448)); real2 v1382 = minusplus(uminus(v1378), v1379); real2 v1380 = minusplus(v1378, v1379); real2 v1388 = ctimesminusplus(reverse(v1380), ctbl[3], ctimes(v1380, ctbl[5])); real2 v1392 = ctimesminusplus(reverse(v1382), ctbl[5], ctimes(v1382, ctbl[3])); real2 v1508 = plus(v1392, v1424); real2 v1502 = reverse(minus(v1392, v1424)); real2 v1489 = plus(v1388, v1420); real2 v1483 = reverse(minus(v1388, v1420)); real2 v1603 = minusplus(v1601, v1602); real2 v1605 = minusplus(uminus(v1601), v1602); real2 v1615 = ctimesminusplus(reverse(v1605), ctbl[1], ctimes(v1605, ctbl[0])); real2 v1611 = ctimesminusplus(reverse(v1603), ctbl[1], ctimes(v1603, ctbl[1])); real2 v1617 = reverse(minus(v1606, v1607)); real2 v1623 = plus(v1606, v1607); real2 v1541 = minusplus(uminus(v1537), v1538); real2 v1539 = minusplus(v1537, v1538); real2 v1547 = ctimesminusplus(reverse(v1539), ctbl[3], ctimes(v1539, ctbl[5])); real2 v1551 = ctimesminusplus(reverse(v1541), ctbl[5], ctimes(v1541, ctbl[3])); real2 v1667 = plus(v1551, v1583); real2 v1661 = reverse(minus(v1551, v1583)); real2 v1648 = plus(v1547, v1579); real2 v1642 = reverse(minus(v1547, v1579)); real2 v1229 = reverse(minus(v1119, v1151)); real2 v1235 = plus(v1119, v1151); real2 v76 = load(in, 74 << shift); real2 v12 = load(in, 10 << shift); real2 v322 = plus(v12, v76); real2 v318 = minus(v76, v12); real2 v44 = load(in, 42 << shift); real2 v108 = load(in, 106 << shift); real2 v323 = plus(v44, v108); real2 v317 = reverse(minus(v44, v108)); real2 v866 = minus(v323, v322); real2 v870 = plus(v322, v323); real2 v92 = load(in, 90 << shift); real2 v28 = load(in, 26 << shift); real2 v602 = plus(v28, v92); real2 v598 = minus(v92, v28); real2 v60 = load(in, 58 << shift); real2 v124 = load(in, 122 << shift); real2 v603 = plus(v60, v124); real2 v597 = reverse(minus(v60, v124)); real2 v865 = reverse(minus(v602, v603)); real2 v871 = plus(v602, v603); real2 v869 = minusplus(uminus(v865), v866); real2 v867 = minusplus(v865, v866); real2 v879 = ctimesminusplus(reverse(v869), ctbl[7], ctimes(v869, ctbl[6])); real2 v993 = reverse(minus(v870, v871)); real2 v999 = plus(v870, v871); real2 v875 = ctimesminusplus(reverse(v867), ctbl[9], ctimes(v867, ctbl[11])); real2 v100 = load(in, 98 << shift); real2 v36 = load(in, 34 << shift); real2 v167 = reverse(minus(v36, v100)); real2 v173 = plus(v36, v100); real2 v4 = load(in, 2 << shift); real2 v68 = load(in, 66 << shift); real2 v168 = minus(v68, v4); real2 v172 = plus(v4, v68); real2 v742 = plus(v172, v173); real2 v738 = minus(v173, v172); real2 v84 = load(in, 82 << shift); real2 v20 = load(in, 18 << shift); real2 v462 = plus(v20, v84); real2 v458 = minus(v84, v20); real2 v116 = load(in, 114 << shift); real2 v52 = load(in, 50 << shift); real2 v463 = plus(v52, v116); real2 v457 = reverse(minus(v52, v116)); real2 v737 = reverse(minus(v462, v463)); real2 v743 = plus(v462, v463); real2 v998 = plus(v742, v743); real2 v994 = minus(v743, v742); real2 v739 = minusplus(v737, v738); real2 v741 = minusplus(uminus(v737), v738); real2 v995 = minusplus(v993, v994); real2 v997 = minusplus(uminus(v993), v994); real2 v1007 = ctimesminusplus(reverse(v997), ctbl[5], ctimes(v997, ctbl[3])); real2 v747 = ctimesminusplus(reverse(v739), ctbl[7], ctimes(v739, ctbl[13])); real2 v1395 = minus(v875, v747); real2 v1399 = plus(v747, v875); real2 v1003 = ctimesminusplus(reverse(v995), ctbl[3], ctimes(v995, ctbl[5])); real2 v1122 = minus(v999, v998); real2 v1126 = plus(v998, v999); real2 v72 = load(in, 70 << shift); real2 v8 = load(in, 6 << shift); real2 v246 = minus(v72, v8); real2 v250 = plus(v8, v72); real2 v104 = load(in, 102 << shift); real2 v40 = load(in, 38 << shift); real2 v245 = reverse(minus(v40, v104)); real2 v251 = plus(v40, v104); real2 v802 = minus(v251, v250); real2 v806 = plus(v250, v251); real2 v24 = load(in, 22 << shift); real2 v88 = load(in, 86 << shift); real2 v530 = plus(v24, v88); real2 v526 = minus(v88, v24); real2 v120 = load(in, 118 << shift); real2 v56 = load(in, 54 << shift); real2 v531 = plus(v56, v120); real2 v525 = reverse(minus(v56, v120)); real2 v801 = reverse(minus(v530, v531)); real2 v807 = plus(v530, v531); real2 v1058 = minus(v807, v806); real2 v1062 = plus(v806, v807); real2 v803 = minusplus(v801, v802); real2 v805 = minusplus(uminus(v801), v802); real2 v811 = ctimesminusplus(reverse(v803), ctbl[11], ctimes(v803, ctbl[9])); real2 v128 = load(in, 126 << shift); real2 v64 = load(in, 62 << shift); real2 v669 = reverse(minus(v64, v128)); real2 v675 = plus(v64, v128); real2 v32 = load(in, 30 << shift); real2 v96 = load(in, 94 << shift); real2 v674 = plus(v32, v96); real2 v670 = minus(v96, v32); real2 v935 = plus(v674, v675); real2 v929 = reverse(minus(v674, v675)); real2 v80 = load(in, 78 << shift); real2 v16 = load(in, 14 << shift); real2 v389 = minus(v80, v16); real2 v393 = plus(v16, v80); real2 v112 = load(in, 110 << shift); real2 v48 = load(in, 46 << shift); real2 v394 = plus(v48, v112); real2 v388 = reverse(minus(v48, v112)); real2 v930 = minus(v394, v393); real2 v934 = plus(v393, v394); real2 v1063 = plus(v934, v935); real2 v1057 = reverse(minus(v934, v935)); real2 v1059 = minusplus(v1057, v1058); real2 v1061 = minusplus(uminus(v1057), v1058); real2 v1127 = plus(v1062, v1063); real2 v1121 = reverse(minus(v1062, v1063)); real2 v1123 = minusplus(v1121, v1122); real2 v1125 = minusplus(uminus(v1121), v1122); real2 v1135 = ctimesminusplus(reverse(v1125), ctbl[1], ctimes(v1125, ctbl[0])); real2 v1071 = ctimesminusplus(reverse(v1061), ctbl[4], ctimes(v1061, ctbl[2])); real2 v1311 = plus(v1007, v1071); real2 v1305 = reverse(minus(v1007, v1071)); real2 v1131 = ctimesminusplus(reverse(v1123), ctbl[1], ctimes(v1123, ctbl[1])); real2 v1153 = reverse(minus(v1126, v1127)); real2 v1159 = plus(v1126, v1127); real2 v1067 = ctimesminusplus(reverse(v1059), ctbl[5], ctimes(v1059, ctbl[3])); real2 v1248 = reverse(minus(v1003, v1067)); real2 v1254 = plus(v1003, v1067); real2 v94 = load(in, 92 << shift); real2 v30 = load(in, 28 << shift); real2 v634 = minus(v94, v30); real2 v638 = plus(v30, v94); real2 v126 = load(in, 124 << shift); real2 v62 = load(in, 60 << shift); real2 v633 = reverse(minus(v62, v126)); real2 v639 = plus(v62, v126); real2 v897 = reverse(minus(v638, v639)); real2 v903 = plus(v638, v639); real2 v42 = load(in, 40 << shift); real2 v106 = load(in, 104 << shift); real2 v283 = reverse(minus(v42, v106)); real2 v289 = plus(v42, v106); real2 v10 = load(in, 8 << shift); real2 v74 = load(in, 72 << shift); real2 v284 = minus(v74, v10); real2 v288 = plus(v10, v74); real2 v838 = plus(v288, v289); real2 v834 = minus(v289, v288); real2 v26 = load(in, 24 << shift); real2 v90 = load(in, 88 << shift); real2 v562 = minus(v90, v26); real2 v566 = plus(v26, v90); real2 v122 = load(in, 120 << shift); real2 v58 = load(in, 56 << shift); real2 v567 = plus(v58, v122); real2 v561 = reverse(minus(v58, v122)); real2 v833 = reverse(minus(v566, v567)); real2 v839 = plus(v566, v567); real2 v967 = plus(v838, v839); real2 v961 = reverse(minus(v838, v839)); real2 v14 = load(in, 12 << shift); real2 v78 = load(in, 76 << shift); real2 v353 = minus(v78, v14); real2 v357 = plus(v14, v78); real2 v46 = load(in, 44 << shift); real2 v110 = load(in, 108 << shift); real2 v358 = plus(v46, v110); real2 v352 = reverse(minus(v46, v110)); real2 v898 = minus(v358, v357); real2 v902 = plus(v357, v358); real2 v1025 = reverse(minus(v902, v903)); real2 v1031 = plus(v902, v903); real2 v114 = load(in, 112 << shift); real2 v50 = load(in, 48 << shift); real2 v422 = reverse(minus(v50, v114)); real2 v428 = plus(v50, v114); real2 v2 = load(in, 0 << shift); real2 v66 = load(in, 64 << shift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << shift); real2 v34 = load(in, 32 << shift); real2 v137 = plus(v34, v98); real2 v131 = reverse(minus(v34, v98)); real2 v706 = minus(v137, v136); real2 v710 = plus(v136, v137); real2 v18 = load(in, 16 << shift); real2 v82 = load(in, 80 << shift); real2 v427 = plus(v18, v82); real2 v423 = minus(v82, v18); real2 v705 = reverse(minus(v427, v428)); real2 v711 = plus(v427, v428); real2 v966 = plus(v710, v711); real2 v962 = minus(v711, v710); real2 v1090 = minus(v967, v966); real2 v1094 = plus(v966, v967); real2 v70 = load(in, 68 << shift); real2 v6 = load(in, 4 << shift); real2 v210 = plus(v6, v70); real2 v206 = minus(v70, v6); real2 v38 = load(in, 36 << shift); real2 v102 = load(in, 100 << shift); real2 v211 = plus(v38, v102); real2 v205 = reverse(minus(v38, v102)); real2 v774 = plus(v210, v211); real2 v770 = minus(v211, v210); real2 v22 = load(in, 20 << shift); real2 v86 = load(in, 84 << shift); real2 v492 = minus(v86, v22); real2 v496 = plus(v22, v86); real2 v118 = load(in, 116 << shift); real2 v54 = load(in, 52 << shift); real2 v497 = plus(v54, v118); real2 v491 = reverse(minus(v54, v118)); real2 v775 = plus(v496, v497); real2 v769 = reverse(minus(v496, v497)); real2 v1030 = plus(v774, v775); real2 v1026 = minus(v775, v774); real2 v1095 = plus(v1030, v1031); real2 v1089 = reverse(minus(v1030, v1031)); real2 v1103 = minus(uminusplus(v1089), v1090); real2 v1099 = minus(uplusminus(v1089), v1090); real2 v1230 = minus(v1135, v1103); store(out, 56 << shift, minus(uplusminus(v1229), v1230)); store(out, 120 << shift, minus(uminusplus(v1229), v1230)); real2 v1234 = plus(v1103, v1135); store(out, 24 << shift, plus(v1234, v1235)); store(out, 88 << shift, minus(v1234, v1235)); real2 v1211 = minus(v1131, v1099); real2 v1215 = plus(v1099, v1131); store(out, 8 << shift, plus(v1215, v1216)); store(out, 72 << shift, minus(v1215, v1216)); store(out, 40 << shift, minus(uplusminus(v1210), v1211)); store(out, 104 << shift, minus(uminusplus(v1210), v1211)); real2 v1158 = plus(v1094, v1095); real2 v1154 = minus(v1095, v1094); real2 v1186 = minus(v1159, v1158); store(out, 32 << shift, minus(uplusminus(v1185), v1186)); store(out, 96 << shift, minus(uminusplus(v1185), v1186)); real2 v1190 = plus(v1158, v1159); store(out, 64 << shift, minus(v1190, v1191)); store(out, 0 << shift, plus(v1190, v1191)); real2 v1163 = minus(uplusminus(v1153), v1154); store(out, 16 << shift, plus(v1163, v1179)); store(out, 80 << shift, minus(v1163, v1179)); real2 v1167 = minus(uminusplus(v1153), v1154); store(out, 112 << shift, minus(v1167, v1183)); store(out, 48 << shift, plus(v1167, v1183)); real2 v971 = minus(uplusminus(v961), v962); real2 v975 = minus(uminusplus(v961), v962); real2 v1027 = minusplus(v1025, v1026); real2 v1029 = minusplus(uminus(v1025), v1026); real2 v1039 = ctimesminusplus(reverse(v1029), ctbl[1], ctimes(v1029, ctbl[0])); real2 v1306 = minus(v1039, v975); real2 v1310 = plus(v975, v1039); real2 v1319 = minus(uminusplus(v1305), v1306); real2 v1315 = minus(uplusminus(v1305), v1306); store(out, 124 << shift, minus(v1319, v1335)); store(out, 60 << shift, plus(v1319, v1335)); store(out, 28 << shift, plus(v1315, v1331)); store(out, 92 << shift, minus(v1315, v1331)); real2 v1342 = plus(v1310, v1311); store(out, 76 << shift, minus(v1342, v1343)); store(out, 12 << shift, plus(v1342, v1343)); real2 v1338 = minus(v1311, v1310); store(out, 44 << shift, minus(uplusminus(v1337), v1338)); store(out, 108 << shift, minus(uminusplus(v1337), v1338)); real2 v1035 = ctimesminusplus(reverse(v1027), ctbl[1], ctimes(v1027, ctbl[1])); real2 v1249 = minus(v1035, v971); real2 v1253 = plus(v971, v1035); real2 v1262 = minus(uminusplus(v1248), v1249); real2 v1258 = minus(uplusminus(v1248), v1249); store(out, 84 << shift, minus(v1258, v1274)); store(out, 20 << shift, plus(v1258, v1274)); store(out, 52 << shift, plus(v1262, v1278)); store(out, 116 << shift, minus(v1262, v1278)); real2 v1281 = minus(v1254, v1253); real2 v1285 = plus(v1253, v1254); store(out, 68 << shift, minus(v1285, v1286)); store(out, 4 << shift, plus(v1285, v1286)); store(out, 100 << shift, minus(uminusplus(v1280), v1281)); store(out, 36 << shift, minus(uplusminus(v1280), v1281)); real2 v835 = minusplus(v833, v834); real2 v837 = minusplus(uminus(v833), v834); real2 v843 = ctimesminusplus(reverse(v835), ctbl[1], ctimes(v835, ctbl[1])); real2 v773 = minusplus(uminus(v769), v770); real2 v771 = minusplus(v769, v770); real2 v779 = ctimesminusplus(reverse(v771), ctbl[3], ctimes(v771, ctbl[5])); real2 v901 = minusplus(uminus(v897), v898); real2 v899 = minusplus(v897, v898); real2 v907 = ctimesminusplus(reverse(v899), ctbl[5], ctimes(v899, ctbl[3])); real2 v719 = minus(uminusplus(v705), v706); real2 v715 = minus(uplusminus(v705), v706); real2 v933 = minusplus(uminus(v929), v930); real2 v931 = minusplus(v929, v930); real2 v939 = ctimesminusplus(reverse(v931), ctbl[13], ctimes(v931, ctbl[7])); real2 v1394 = reverse(minus(v811, v939)); real2 v1400 = plus(v811, v939); real2 v1426 = reverse(minus(v1399, v1400)); real2 v1432 = plus(v1399, v1400); real2 v1367 = plus(v715, v843); real2 v1363 = minus(v843, v715); real2 v1368 = plus(v779, v907); real2 v1362 = reverse(minus(v779, v907)); real2 v1427 = minus(v1368, v1367); real2 v1431 = plus(v1367, v1368); real2 v1440 = minus(uminusplus(v1426), v1427); real2 v1436 = minus(uplusminus(v1426), v1427); store(out, 18 << shift, plus(v1436, v1452)); store(out, 82 << shift, minus(v1436, v1452)); store(out, 114 << shift, minus(v1440, v1456)); store(out, 50 << shift, plus(v1440, v1456)); real2 v1459 = minus(v1432, v1431); store(out, 98 << shift, minus(uminusplus(v1458), v1459)); store(out, 34 << shift, minus(uplusminus(v1458), v1459)); real2 v1463 = plus(v1431, v1432); store(out, 2 << shift, plus(v1463, v1464)); store(out, 66 << shift, minus(v1463, v1464)); real2 v1372 = minus(uplusminus(v1362), v1363); real2 v1376 = minus(uminusplus(v1362), v1363); real2 v1398 = minusplus(uminus(v1394), v1395); real2 v1396 = minusplus(v1394, v1395); real2 v1404 = ctimesminusplus(reverse(v1396), ctbl[1], ctimes(v1396, ctbl[1])); real2 v1484 = minus(v1404, v1372); store(out, 106 << shift, minus(uminusplus(v1483), v1484)); store(out, 42 << shift, minus(uplusminus(v1483), v1484)); real2 v1488 = plus(v1372, v1404); store(out, 10 << shift, plus(v1488, v1489)); store(out, 74 << shift, minus(v1488, v1489)); real2 v1408 = ctimesminusplus(reverse(v1398), ctbl[1], ctimes(v1398, ctbl[0])); real2 v1503 = minus(v1408, v1376); store(out, 122 << shift, minus(uminusplus(v1502), v1503)); store(out, 58 << shift, minus(uplusminus(v1502), v1503)); real2 v1507 = plus(v1376, v1408); store(out, 90 << shift, minus(v1507, v1508)); store(out, 26 << shift, plus(v1507, v1508)); real2 v847 = ctimesminusplus(reverse(v837), ctbl[1], ctimes(v837, ctbl[0])); real2 v911 = ctimesminusplus(reverse(v901), ctbl[4], ctimes(v901, ctbl[2])); real2 v815 = ctimesminusplus(reverse(v805), ctbl[13], ctimes(v805, ctbl[12])); real2 v1522 = minus(v847, v719); real2 v1526 = plus(v719, v847); real2 v751 = ctimesminusplus(reverse(v741), ctbl[11], ctimes(v741, ctbl[9])); real2 v1554 = minus(v879, v751); real2 v1558 = plus(v751, v879); real2 v943 = ctimesminusplus(reverse(v933), ctbl[10], ctimes(v933, ctbl[8])); real2 v1553 = reverse(minus(v815, v943)); real2 v1559 = plus(v815, v943); real2 v1591 = plus(v1558, v1559); real2 v1585 = reverse(minus(v1558, v1559)); real2 v783 = ctimesminusplus(reverse(v773), ctbl[5], ctimes(v773, ctbl[3])); real2 v1521 = reverse(minus(v783, v911)); real2 v1527 = plus(v783, v911); real2 v1586 = minus(v1527, v1526); real2 v1590 = plus(v1526, v1527); real2 v1595 = minus(uplusminus(v1585), v1586); store(out, 22 << shift, plus(v1595, v1611)); store(out, 86 << shift, minus(v1595, v1611)); real2 v1599 = minus(uminusplus(v1585), v1586); store(out, 118 << shift, minus(v1599, v1615)); store(out, 54 << shift, plus(v1599, v1615)); real2 v1622 = plus(v1590, v1591); store(out, 70 << shift, minus(v1622, v1623)); store(out, 6 << shift, plus(v1622, v1623)); real2 v1618 = minus(v1591, v1590); store(out, 102 << shift, minus(uminusplus(v1617), v1618)); store(out, 38 << shift, minus(uplusminus(v1617), v1618)); real2 v1557 = minusplus(uminus(v1553), v1554); real2 v1555 = minusplus(v1553, v1554); real2 v1563 = ctimesminusplus(reverse(v1555), ctbl[1], ctimes(v1555, ctbl[1])); real2 v1531 = minus(uplusminus(v1521), v1522); real2 v1535 = minus(uminusplus(v1521), v1522); real2 v1643 = minus(v1563, v1531); store(out, 46 << shift, minus(uplusminus(v1642), v1643)); store(out, 110 << shift, minus(uminusplus(v1642), v1643)); real2 v1647 = plus(v1531, v1563); store(out, 78 << shift, minus(v1647, v1648)); store(out, 14 << shift, plus(v1647, v1648)); real2 v1567 = ctimesminusplus(reverse(v1557), ctbl[1], ctimes(v1557, ctbl[0])); real2 v1666 = plus(v1535, v1567); real2 v1662 = minus(v1567, v1535); store(out, 94 << shift, minus(v1666, v1667)); store(out, 30 << shift, plus(v1666, v1667)); store(out, 126 << shift, minus(uminusplus(v1661), v1662)); store(out, 62 << shift, minus(uplusminus(v1661), v1662)); real2 v426 = minusplus(uminus(v422), v423); real2 v424 = minusplus(v422, v423); real2 v433 = ctimesminusplus(reverse(v424), ctbl[1], ctimes(v424, ctbl[1])); real2 v141 = minus(uplusminus(v131), v132); real2 v145 = minus(uminusplus(v131), v132); real2 v1685 = plus(v141, v433); real2 v1681 = minus(v433, v141); real2 v247 = minusplus(v245, v246); real2 v249 = minusplus(uminus(v245), v246); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); real2 v217 = ctimesminusplus(reverse(v207), ctbl[7], ctimes(v207, ctbl[13])); real2 v321 = minusplus(uminus(v317), v318); real2 v319 = minusplus(v317, v318); real2 v565 = minusplus(uminus(v561), v562); real2 v563 = minusplus(v561, v562); real2 v285 = minusplus(v283, v284); real2 v287 = minusplus(uminus(v283), v284); real2 v295 = ctimesminusplus(reverse(v285), ctbl[3], ctimes(v285, ctbl[5])); real2 v329 = ctimesminusplus(reverse(v319), ctbl[19], ctimes(v319, ctbl[25])); real2 v571 = ctimesminusplus(reverse(v563), ctbl[5], ctimes(v563, ctbl[3])); real2 v1680 = reverse(minus(v295, v571)); real2 v1686 = plus(v295, v571); real2 v601 = minusplus(uminus(v597), v598); real2 v599 = minusplus(v597, v598); real2 v607 = ctimesminusplus(reverse(v599), ctbl[21], ctimes(v599, ctbl[23])); real2 v1718 = plus(v329, v607); real2 v1712 = reverse(minus(v329, v607)); real2 v527 = minusplus(v525, v526); real2 v529 = minusplus(uminus(v525), v526); real2 v461 = minusplus(uminus(v457), v458); real2 v459 = minusplus(v457, v458); real2 v467 = ctimesminusplus(reverse(v459), ctbl[17], ctimes(v459, ctbl[27])); real2 v255 = ctimesminusplus(reverse(v247), ctbl[23], ctimes(v247, ctbl[21])); real2 v637 = minusplus(uminus(v633), v634); real2 v635 = minusplus(v633, v634); real2 v643 = ctimesminusplus(reverse(v635), ctbl[13], ctimes(v635, ctbl[7])); real2 v1813 = plus(v1685, v1686); real2 v1809 = minus(v1686, v1685); real2 v493 = minusplus(v491, v492); real2 v495 = minusplus(uminus(v491), v492); real2 v171 = minusplus(uminus(v167), v168); real2 v169 = minusplus(v167, v168); real2 v354 = minusplus(v352, v353); real2 v356 = minusplus(uminus(v352), v353); real2 v362 = ctimesminusplus(reverse(v354), ctbl[11], ctimes(v354, ctbl[9])); real2 v179 = ctimesminusplus(reverse(v169), ctbl[15], ctimes(v169, ctbl[29])); real2 v1717 = plus(v179, v467); real2 v1713 = minus(v467, v179); real2 v1841 = minus(v1718, v1717); real2 v1845 = plus(v1717, v1718); real2 v501 = ctimesminusplus(reverse(v493), ctbl[9], ctimes(v493, ctbl[11])); real2 v1745 = minus(v501, v217); real2 v1749 = plus(v217, v501); real2 v671 = minusplus(v669, v670); real2 v673 = minusplus(uminus(v669), v670); real2 v679 = ctimesminusplus(reverse(v671), ctbl[29], ctimes(v671, ctbl[15])); real2 v535 = ctimesminusplus(reverse(v527), ctbl[25], ctimes(v527, ctbl[19])); real2 v1781 = plus(v255, v535); real2 v1777 = minus(v535, v255); real2 v1825 = minus(v1702, v1701); real2 v1829 = plus(v1701, v1702); real2 v1889 = minus(v1830, v1829); real2 v1893 = plus(v1829, v1830); real2 v1910 = plus(v1893, v1894); real2 v1904 = reverse(minus(v1893, v1894)); real2 v392 = minusplus(uminus(v388), v389); real2 v390 = minusplus(v388, v389); real2 v398 = ctimesminusplus(reverse(v390), ctbl[27], ctimes(v390, ctbl[17])); real2 v1776 = reverse(minus(v398, v679)); real2 v1782 = plus(v398, v679); real2 v1744 = reverse(minus(v362, v643)); real2 v1750 = plus(v362, v643); real2 v1808 = reverse(minus(v1749, v1750)); real2 v1814 = plus(v1749, v1750); real2 v1873 = minus(v1814, v1813); real2 v1877 = plus(v1813, v1814); real2 v1846 = plus(v1781, v1782); real2 v1840 = reverse(minus(v1781, v1782)); real2 v1872 = reverse(minus(v1845, v1846)); real2 v1878 = plus(v1845, v1846); real2 v1909 = plus(v1877, v1878); store(out, 1 << shift, plus(v1909, v1910)); store(out, 65 << shift, minus(v1909, v1910)); real2 v1905 = minus(v1878, v1877); store(out, 33 << shift, minus(uplusminus(v1904), v1905)); store(out, 97 << shift, minus(uminusplus(v1904), v1905)); real2 v1822 = minus(uminusplus(v1808), v1809); real2 v1818 = minus(uplusminus(v1808), v1809); real2 v1826 = minusplus(v1824, v1825); real2 v1828 = minusplus(uminus(v1824), v1825); real2 v1838 = ctimesminusplus(reverse(v1828), ctbl[5], ctimes(v1828, ctbl[3])); real2 v1948 = reverse(minus(v1838, v1870)); real2 v1954 = plus(v1838, v1870); real2 v1844 = minusplus(uminus(v1840), v1841); real2 v1842 = minusplus(v1840, v1841); real2 v1854 = ctimesminusplus(reverse(v1844), ctbl[1], ctimes(v1844, ctbl[0])); real2 v1953 = plus(v1822, v1854); real2 v1949 = minus(v1854, v1822); store(out, 89 << shift, minus(v1953, v1954)); store(out, 25 << shift, plus(v1953, v1954)); store(out, 121 << shift, minus(uminusplus(v1948), v1949)); store(out, 57 << shift, minus(uplusminus(v1948), v1949)); real2 v1834 = ctimesminusplus(reverse(v1826), ctbl[3], ctimes(v1826, ctbl[5])); real2 v1850 = ctimesminusplus(reverse(v1842), ctbl[1], ctimes(v1842, ctbl[1])); real2 v1929 = reverse(minus(v1834, v1866)); real2 v1935 = plus(v1834, v1866); real2 v1934 = plus(v1818, v1850); real2 v1930 = minus(v1850, v1818); store(out, 105 << shift, minus(uminusplus(v1929), v1930)); store(out, 41 << shift, minus(uplusminus(v1929), v1930)); store(out, 73 << shift, minus(v1934, v1935)); store(out, 9 << shift, plus(v1934, v1935)); real2 v1890 = minusplus(v1888, v1889); real2 v1892 = minusplus(uminus(v1888), v1889); real2 v1902 = ctimesminusplus(reverse(v1892), ctbl[1], ctimes(v1892, ctbl[0])); real2 v1886 = minus(uminusplus(v1872), v1873); store(out, 113 << shift, minus(v1886, v1902)); store(out, 49 << shift, plus(v1886, v1902)); real2 v1882 = minus(uplusminus(v1872), v1873); real2 v1898 = ctimesminusplus(reverse(v1890), ctbl[1], ctimes(v1890, ctbl[1])); store(out, 17 << shift, plus(v1882, v1898)); store(out, 81 << shift, minus(v1882, v1898)); real2 v1700 = minusplus(uminus(v1696), v1697); real2 v1698 = minusplus(v1696, v1697); real2 v1690 = minus(uplusminus(v1680), v1681); real2 v1694 = minus(uminusplus(v1680), v1681); real2 v1778 = minusplus(v1776, v1777); real2 v1780 = minusplus(uminus(v1776), v1777); real2 v1774 = ctimesminusplus(reverse(v1764), ctbl[7], ctimes(v1764, ctbl[6])); real2 v1710 = ctimesminusplus(reverse(v1700), ctbl[11], ctimes(v1700, ctbl[9])); real2 v2041 = minus(v1774, v1710); real2 v2045 = plus(v1710, v1774); real2 v1714 = minusplus(v1712, v1713); real2 v1716 = minusplus(uminus(v1712), v1713); real2 v2042 = minusplus(v2040, v2041); real2 v2044 = minusplus(uminus(v2040), v2041); real2 v2054 = ctimesminusplus(reverse(v2044), ctbl[1], ctimes(v2044, ctbl[0])); real2 v1726 = ctimesminusplus(reverse(v1716), ctbl[5], ctimes(v1716, ctbl[3])); real2 v1748 = minusplus(uminus(v1744), v1745); real2 v1746 = minusplus(v1744, v1745); real2 v1758 = ctimesminusplus(reverse(v1748), ctbl[1], ctimes(v1748, ctbl[0])); real2 v2029 = plus(v1694, v1758); real2 v2025 = minus(v1758, v1694); real2 v1790 = ctimesminusplus(reverse(v1780), ctbl[4], ctimes(v1780, ctbl[2])); real2 v2024 = reverse(minus(v1726, v1790)); real2 v2030 = plus(v1726, v1790); real2 v2038 = minus(uminusplus(v2024), v2025); store(out, 61 << shift, plus(v2038, v2054)); store(out, 125 << shift, minus(v2038, v2054)); real2 v2034 = minus(uplusminus(v2024), v2025); real2 v2050 = ctimesminusplus(reverse(v2042), ctbl[1], ctimes(v2042, ctbl[1])); store(out, 93 << shift, minus(v2034, v2050)); store(out, 29 << shift, plus(v2034, v2050)); real2 v2056 = reverse(minus(v2045, v2046)); real2 v2062 = plus(v2045, v2046); real2 v2061 = plus(v2029, v2030); store(out, 13 << shift, plus(v2061, v2062)); store(out, 77 << shift, minus(v2061, v2062)); real2 v2057 = minus(v2030, v2029); store(out, 45 << shift, minus(uplusminus(v2056), v2057)); store(out, 109 << shift, minus(uminusplus(v2056), v2057)); real2 v1754 = ctimesminusplus(reverse(v1746), ctbl[1], ctimes(v1746, ctbl[1])); real2 v1722 = ctimesminusplus(reverse(v1714), ctbl[3], ctimes(v1714, ctbl[5])); real2 v1770 = ctimesminusplus(reverse(v1762), ctbl[9], ctimes(v1762, ctbl[11])); real2 v1706 = ctimesminusplus(reverse(v1698), ctbl[7], ctimes(v1698, ctbl[13])); real2 v1988 = plus(v1706, v1770); real2 v1984 = minus(v1770, v1706); real2 v1968 = minus(v1754, v1690); real2 v1972 = plus(v1690, v1754); real2 v1985 = minusplus(v1983, v1984); real2 v1987 = minusplus(uminus(v1983), v1984); real2 v1993 = ctimesminusplus(reverse(v1985), ctbl[1], ctimes(v1985, ctbl[1])); real2 v1786 = ctimesminusplus(reverse(v1778), ctbl[5], ctimes(v1778, ctbl[3])); real2 v1967 = reverse(minus(v1722, v1786)); real2 v1973 = plus(v1722, v1786); real2 v1977 = minus(uplusminus(v1967), v1968); real2 v1981 = minus(uminusplus(v1967), v1968); store(out, 85 << shift, minus(v1977, v1993)); store(out, 21 << shift, plus(v1977, v1993)); real2 v1997 = ctimesminusplus(reverse(v1987), ctbl[1], ctimes(v1987, ctbl[0])); store(out, 117 << shift, minus(v1981, v1997)); store(out, 53 << shift, plus(v1981, v1997)); real2 v2004 = plus(v1972, v1973); real2 v2000 = minus(v1973, v1972); real2 v1999 = reverse(minus(v1988, v1989)); real2 v2005 = plus(v1988, v1989); store(out, 5 << shift, plus(v2004, v2005)); store(out, 69 << shift, minus(v2004, v2005)); store(out, 37 << shift, minus(uplusminus(v1999), v2000)); store(out, 101 << shift, minus(uminusplus(v1999), v2000)); real2 v333 = ctimesminusplus(reverse(v321), ctbl[29], ctimes(v321, ctbl[15])); real2 v613 = ctimesminusplus(reverse(v601), ctbl[16], ctimes(v601, ctbl[26])); real2 v2113 = reverse(minus(v333, v613)); real2 v2119 = plus(v333, v613); real2 v595 = ctimesminusplus(reverse(v583), ctbl[40], ctimes(v583, ctbl[50])); real2 v455 = ctimesminusplus(reverse(v444), ctbl[43], ctimes(v444, ctbl[42])); real2 v165 = ctimesminusplus(reverse(v151), ctbl[47], ctimes(v151, ctbl[45])); real2 v2102 = plus(v165, v455); real2 v2098 = minus(v455, v165); real2 v315 = ctimesminusplus(reverse(v305), ctbl[53], ctimes(v305, ctbl[39])); real2 v2097 = reverse(minus(v315, v595)); real2 v2103 = plus(v315, v595); real2 v261 = ctimesminusplus(reverse(v249), ctbl[17], ctimes(v249, ctbl[27])); real2 v299 = ctimesminusplus(reverse(v287), ctbl[5], ctimes(v287, ctbl[3])); real2 v523 = ctimesminusplus(reverse(v512), ctbl[31], ctimes(v512, ctbl[30])); real2 v541 = ctimesminusplus(reverse(v529), ctbl[28], ctimes(v529, ctbl[14])); real2 v2182 = plus(v261, v541); real2 v2178 = minus(v541, v261); real2 v243 = ctimesminusplus(reverse(v229), ctbl[59], ctimes(v229, ctbl[33])); real2 v667 = ctimesminusplus(reverse(v655), ctbl[34], ctimes(v655, ctbl[56])); real2 v2166 = plus(v243, v523); real2 v2162 = minus(v523, v243); real2 v386 = ctimesminusplus(reverse(v373), ctbl[37], ctimes(v373, ctbl[36])); real2 v2161 = reverse(minus(v386, v667)); real2 v2167 = plus(v386, v667); real2 v472 = ctimesminusplus(reverse(v461), ctbl[19], ctimes(v461, ctbl[18])); real2 v185 = ctimesminusplus(reverse(v171), ctbl[23], ctimes(v171, ctbl[21])); real2 v2114 = minus(v472, v185); real2 v2118 = plus(v185, v472); real2 v420 = ctimesminusplus(reverse(v409), ctbl[49], ctimes(v409, ctbl[48])); real2 v506 = ctimesminusplus(reverse(v495), ctbl[7], ctimes(v495, ctbl[6])); real2 v2199 = plus(v420, v703); real2 v2193 = reverse(minus(v420, v703)); real2 v649 = ctimesminusplus(reverse(v637), ctbl[10], ctimes(v637, ctbl[8])); real2 v223 = ctimesminusplus(reverse(v209), ctbl[11], ctimes(v209, ctbl[9])); real2 v2146 = minus(v506, v223); real2 v2150 = plus(v223, v506); real2 v2231 = plus(v2166, v2167); real2 v2225 = reverse(minus(v2166, v2167)); real2 v685 = ctimesminusplus(reverse(v673), ctbl[22], ctimes(v673, ctbl[20])); real2 v2257 = reverse(minus(v2198, v2199)); real2 v2263 = plus(v2198, v2199); real2 v2226 = minus(v2103, v2102); real2 v2230 = plus(v2102, v2103); real2 v2294 = plus(v2230, v2231); real2 v2290 = minus(v2231, v2230); real2 v2246 = plus(v2118, v2119); real2 v2242 = minus(v2119, v2118); real2 v577 = ctimesminusplus(reverse(v565), ctbl[4], ctimes(v565, ctbl[2])); real2 v2081 = reverse(minus(v299, v577)); real2 v2087 = plus(v299, v577); real2 v403 = ctimesminusplus(reverse(v392), ctbl[25], ctimes(v392, ctbl[24])); real2 v2177 = reverse(minus(v403, v685)); real2 v2183 = plus(v403, v685); real2 v438 = ctimesminusplus(reverse(v426), ctbl[1], ctimes(v426, ctbl[0])); real2 v2086 = plus(v145, v438); real2 v2082 = minus(v438, v145); real2 v2210 = minus(v2087, v2086); real2 v2214 = plus(v2086, v2087); real2 v2247 = plus(v2182, v2183); real2 v2241 = reverse(minus(v2182, v2183)); real2 v2279 = plus(v2246, v2247); real2 v2273 = reverse(minus(v2246, v2247)); real2 v367 = ctimesminusplus(reverse(v356), ctbl[13], ctimes(v356, ctbl[12])); real2 v2151 = plus(v367, v649); real2 v2145 = reverse(minus(v367, v649)); real2 v2209 = reverse(minus(v2150, v2151)); real2 v2215 = plus(v2150, v2151); real2 v2274 = minus(v2215, v2214); real2 v2278 = plus(v2214, v2215); real2 v2310 = plus(v2278, v2279); real2 v2306 = minus(v2279, v2278); real2 v2295 = plus(v2262, v2263); real2 v2289 = reverse(minus(v2262, v2263)); real2 v2311 = plus(v2294, v2295); store(out, 3 << shift, plus(v2310, v2311)); store(out, 67 << shift, minus(v2310, v2311)); real2 v2305 = reverse(minus(v2294, v2295)); store(out, 35 << shift, minus(uplusminus(v2305), v2306)); store(out, 99 << shift, minus(uminusplus(v2305), v2306)); real2 v2287 = minus(uminusplus(v2273), v2274); real2 v2283 = minus(uplusminus(v2273), v2274); real2 v2291 = minusplus(v2289, v2290); real2 v2293 = minusplus(uminus(v2289), v2290); real2 v2299 = ctimesminusplus(reverse(v2291), ctbl[1], ctimes(v2291, ctbl[1])); store(out, 19 << shift, plus(v2283, v2299)); store(out, 83 << shift, minus(v2283, v2299)); real2 v2303 = ctimesminusplus(reverse(v2293), ctbl[1], ctimes(v2293, ctbl[0])); store(out, 51 << shift, plus(v2287, v2303)); store(out, 115 << shift, minus(v2287, v2303)); real2 v2229 = minusplus(uminus(v2225), v2226); real2 v2227 = minusplus(v2225, v2226); real2 v2235 = ctimesminusplus(reverse(v2227), ctbl[3], ctimes(v2227, ctbl[5])); real2 v2219 = minus(uplusminus(v2209), v2210); real2 v2223 = minus(uminusplus(v2209), v2210); real2 v2243 = minusplus(v2241, v2242); real2 v2245 = minusplus(uminus(v2241), v2242); real2 v2251 = ctimesminusplus(reverse(v2243), ctbl[1], ctimes(v2243, ctbl[1])); real2 v2331 = minus(v2251, v2219); real2 v2335 = plus(v2219, v2251); real2 v2259 = minusplus(v2257, v2258); real2 v2261 = minusplus(uminus(v2257), v2258); real2 v2267 = ctimesminusplus(reverse(v2259), ctbl[5], ctimes(v2259, ctbl[3])); real2 v2336 = plus(v2235, v2267); store(out, 75 << shift, minus(v2335, v2336)); store(out, 11 << shift, plus(v2335, v2336)); real2 v2330 = reverse(minus(v2235, v2267)); store(out, 107 << shift, minus(uminusplus(v2330), v2331)); store(out, 43 << shift, minus(uplusminus(v2330), v2331)); real2 v2239 = ctimesminusplus(reverse(v2229), ctbl[5], ctimes(v2229, ctbl[3])); real2 v2271 = ctimesminusplus(reverse(v2261), ctbl[4], ctimes(v2261, ctbl[2])); real2 v2255 = ctimesminusplus(reverse(v2245), ctbl[1], ctimes(v2245, ctbl[0])); real2 v2350 = minus(v2255, v2223); real2 v2354 = plus(v2223, v2255); real2 v2355 = plus(v2239, v2271); store(out, 91 << shift, minus(v2354, v2355)); store(out, 27 << shift, plus(v2354, v2355)); real2 v2349 = reverse(minus(v2239, v2271)); store(out, 59 << shift, minus(uplusminus(v2349), v2350)); store(out, 123 << shift, minus(uminusplus(v2349), v2350)); real2 v2091 = minus(uplusminus(v2081), v2082); real2 v2095 = minus(uminusplus(v2081), v2082); real2 v2181 = minusplus(uminus(v2177), v2178); real2 v2179 = minusplus(v2177, v2178); real2 v2101 = minusplus(uminus(v2097), v2098); real2 v2099 = minusplus(v2097, v2098); real2 v2165 = minusplus(uminus(v2161), v2162); real2 v2163 = minusplus(v2161, v2162); real2 v2147 = minusplus(v2145, v2146); real2 v2149 = minusplus(uminus(v2145), v2146); real2 v2155 = ctimesminusplus(reverse(v2147), ctbl[1], ctimes(v2147, ctbl[1])); real2 v2197 = minusplus(uminus(v2193), v2194); real2 v2195 = minusplus(v2193, v2194); real2 v2117 = minusplus(uminus(v2113), v2114); real2 v2115 = minusplus(v2113, v2114); real2 v2123 = ctimesminusplus(reverse(v2115), ctbl[3], ctimes(v2115, ctbl[5])); real2 v2171 = ctimesminusplus(reverse(v2163), ctbl[9], ctimes(v2163, ctbl[11])); real2 v2107 = ctimesminusplus(reverse(v2099), ctbl[7], ctimes(v2099, ctbl[13])); real2 v2389 = plus(v2107, v2171); real2 v2385 = minus(v2171, v2107); real2 v2187 = ctimesminusplus(reverse(v2179), ctbl[5], ctimes(v2179, ctbl[3])); real2 v2374 = plus(v2123, v2187); real2 v2368 = reverse(minus(v2123, v2187)); real2 v2369 = minus(v2155, v2091); real2 v2373 = plus(v2091, v2155); real2 v2405 = plus(v2373, v2374); real2 v2401 = minus(v2374, v2373); real2 v2203 = ctimesminusplus(reverse(v2195), ctbl[13], ctimes(v2195, ctbl[7])); real2 v2131 = minusplus(v2129, v2130); real2 v2133 = minusplus(uminus(v2129), v2130); real2 v2139 = ctimesminusplus(reverse(v2131), ctbl[11], ctimes(v2131, ctbl[9])); real2 v2390 = plus(v2139, v2203); real2 v2384 = reverse(minus(v2139, v2203)); real2 v2400 = reverse(minus(v2389, v2390)); store(out, 103 << shift, minus(uminusplus(v2400), v2401)); store(out, 39 << shift, minus(uplusminus(v2400), v2401)); real2 v2406 = plus(v2389, v2390); store(out, 71 << shift, minus(v2405, v2406)); store(out, 7 << shift, plus(v2405, v2406)); real2 v2382 = minus(uminusplus(v2368), v2369); real2 v2378 = minus(uplusminus(v2368), v2369); real2 v2388 = minusplus(uminus(v2384), v2385); real2 v2386 = minusplus(v2384, v2385); real2 v2398 = ctimesminusplus(reverse(v2388), ctbl[1], ctimes(v2388, ctbl[0])); store(out, 119 << shift, minus(v2382, v2398)); store(out, 55 << shift, plus(v2382, v2398)); real2 v2394 = ctimesminusplus(reverse(v2386), ctbl[1], ctimes(v2386, ctbl[1])); store(out, 87 << shift, minus(v2378, v2394)); store(out, 23 << shift, plus(v2378, v2394)); real2 v2127 = ctimesminusplus(reverse(v2117), ctbl[5], ctimes(v2117, ctbl[3])); real2 v2175 = ctimesminusplus(reverse(v2165), ctbl[7], ctimes(v2165, ctbl[6])); real2 v2111 = ctimesminusplus(reverse(v2101), ctbl[11], ctimes(v2101, ctbl[9])); real2 v2442 = minus(v2175, v2111); real2 v2446 = plus(v2111, v2175); real2 v2207 = ctimesminusplus(reverse(v2197), ctbl[10], ctimes(v2197, ctbl[8])); real2 v2159 = ctimesminusplus(reverse(v2149), ctbl[1], ctimes(v2149, ctbl[0])); real2 v2430 = plus(v2095, v2159); real2 v2426 = minus(v2159, v2095); real2 v2191 = ctimesminusplus(reverse(v2181), ctbl[4], ctimes(v2181, ctbl[2])); real2 v2143 = ctimesminusplus(reverse(v2133), ctbl[13], ctimes(v2133, ctbl[12])); real2 v2447 = plus(v2143, v2207); real2 v2441 = reverse(minus(v2143, v2207)); real2 v2425 = reverse(minus(v2127, v2191)); real2 v2431 = plus(v2127, v2191); real2 v2458 = minus(v2431, v2430); real2 v2462 = plus(v2430, v2431); real2 v2457 = reverse(minus(v2446, v2447)); store(out, 47 << shift, minus(uplusminus(v2457), v2458)); store(out, 111 << shift, minus(uminusplus(v2457), v2458)); real2 v2463 = plus(v2446, v2447); store(out, 79 << shift, minus(v2462, v2463)); store(out, 15 << shift, plus(v2462, v2463)); real2 v2435 = minus(uplusminus(v2425), v2426); real2 v2439 = minus(uminusplus(v2425), v2426); real2 v2445 = minusplus(uminus(v2441), v2442); real2 v2443 = minusplus(v2441, v2442); real2 v2451 = ctimesminusplus(reverse(v2443), ctbl[1], ctimes(v2443, ctbl[1])); store(out, 31 << shift, plus(v2435, v2451)); store(out, 95 << shift, minus(v2435, v2451)); real2 v2455 = ctimesminusplus(reverse(v2445), ctbl[1], ctimes(v2445, ctbl[0])); store(out, 127 << shift, minus(v2439, v2455)); store(out, 63 << shift, plus(v2439, v2455)); // Pres : 68124 } } ALIGNED(8192) void dft128b_%CONFIG%_%ISA%(real *RESTRICT out0, const real *RESTRICT in0, const int shift) { const int k = 1 << (shift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + i0*2; const real *in = in0 + i0*2; // Pres : 135650 real2 v51 = load(in, 49 << shift); real2 v115 = load(in, 113 << shift); real2 v456 = plus(v51, v115); real2 v450 = reverse(minus(v115, v51)); real2 v83 = load(in, 81 << shift); real2 v19 = load(in, 17 << shift); real2 v455 = plus(v19, v83); real2 v451 = minus(v83, v19); real2 v721 = reverse(minus(v456, v455)); real2 v727 = plus(v455, v456); real2 v452 = minusplus(v450, v451); real2 v454 = minusplus(uminus(v450), v451); real2 v462 = ctimesminusplus(reverse(v452), ctbl[58], ctimes(v452, ctbl[59])); real2 v466 = ctimesminusplus(reverse(v454), ctbl[48], ctimes(v454, ctbl[42])); real2 v35 = load(in, 33 << shift); real2 v99 = load(in, 97 << shift); real2 v147 = reverse(minus(v99, v35)); real2 v153 = plus(v35, v99); real2 v3 = load(in, 1 << shift); real2 v67 = load(in, 65 << shift); real2 v152 = plus(v3, v67); real2 v148 = minus(v67, v3); real2 v722 = minus(v153, v152); real2 v726 = plus(v152, v153); real2 v982 = plus(v726, v727); real2 v978 = minus(v727, v726); real2 v149 = minusplus(v147, v148); real2 v151 = minusplus(uminus(v147), v148); real2 v165 = ctimesminusplus(reverse(v151), ctbl[44], ctimes(v151, ctbl[45])); real2 v159 = ctimesminusplus(reverse(v149), ctbl[60], ctimes(v149, ctbl[61])); real2 v723 = minusplus(v721, v722); real2 v725 = minusplus(uminus(v721), v722); real2 v2102 = plus(v165, v466); real2 v2098 = minus(v466, v165); real2 v731 = ctimesminusplus(reverse(v723), ctbl[28], ctimes(v723, ctbl[29])); real2 v1697 = minus(v462, v159); real2 v1701 = plus(v159, v462); real2 v735 = ctimesminusplus(reverse(v725), ctbl[20], ctimes(v725, ctbl[21])); real2 v75 = load(in, 73 << shift); real2 v11 = load(in, 9 << shift); real2 v304 = minus(v75, v11); real2 v308 = plus(v11, v75); real2 v107 = load(in, 105 << shift); real2 v43 = load(in, 41 << shift); real2 v309 = plus(v43, v107); real2 v303 = reverse(minus(v107, v43)); real2 v854 = plus(v308, v309); real2 v850 = minus(v309, v308); real2 v307 = minusplus(uminus(v303), v304); real2 v305 = minusplus(v303, v304); real2 v313 = ctimesminusplus(reverse(v305), ctbl[56], ctimes(v305, ctbl[57])); real2 v319 = ctimesminusplus(reverse(v307), ctbl[38], ctimes(v307, ctbl[39])); real2 v27 = load(in, 25 << shift); real2 v91 = load(in, 89 << shift); real2 v591 = plus(v27, v91); real2 v587 = minus(v91, v27); real2 v59 = load(in, 57 << shift); real2 v123 = load(in, 121 << shift); real2 v592 = plus(v59, v123); real2 v586 = reverse(minus(v123, v59)); real2 v849 = reverse(minus(v592, v591)); real2 v855 = plus(v591, v592); real2 v983 = plus(v854, v855); real2 v977 = reverse(minus(v855, v854)); real2 v590 = minusplus(uminus(v586), v587); real2 v588 = minusplus(v586, v587); real2 v597 = ctimesminusplus(reverse(v588), ctbl[54], ctimes(v588, ctbl[55])); real2 v1110 = plus(v982, v983); real2 v1106 = minus(v983, v982); real2 v853 = minusplus(uminus(v849), v850); real2 v851 = minusplus(v849, v850); real2 v859 = ctimesminusplus(reverse(v851), ctbl[26], ctimes(v851, ctbl[27])); real2 v1702 = plus(v313, v597); real2 v1696 = reverse(minus(v597, v313)); real2 v1825 = minus(v1702, v1701); real2 v1829 = plus(v1701, v1702); real2 v1700 = minusplus(uminus(v1696), v1697); real2 v1698 = minusplus(v1696, v1697); real2 v1710 = ctimesminusplus(reverse(v1700), ctbl[8], ctimes(v1700, ctbl[9])); real2 v1706 = ctimesminusplus(reverse(v1698), ctbl[12], ctimes(v1698, ctbl[13])); real2 v1379 = minus(v859, v731); real2 v1383 = plus(v731, v859); real2 v863 = ctimesminusplus(reverse(v853), ctbl[24], ctimes(v853, ctbl[18])); real2 v1538 = minus(v863, v735); real2 v1542 = plus(v735, v863); real2 v981 = minusplus(uminus(v977), v978); real2 v979 = minusplus(v977, v978); real2 v987 = ctimesminusplus(reverse(v979), ctbl[12], ctimes(v979, ctbl[13])); real2 v601 = ctimesminusplus(reverse(v590), ctbl[51], ctimes(v590, ctbl[50])); real2 v991 = ctimesminusplus(reverse(v981), ctbl[8], ctimes(v981, ctbl[9])); real2 v2103 = plus(v319, v601); real2 v2097 = reverse(minus(v601, v319)); real2 v2226 = minus(v2103, v2102); real2 v2230 = plus(v2102, v2103); real2 v2101 = minusplus(uminus(v2097), v2098); real2 v2099 = minusplus(v2097, v2098); real2 v2107 = ctimesminusplus(reverse(v2099), ctbl[12], ctimes(v2099, ctbl[13])); real2 v127 = load(in, 125 << shift); real2 v63 = load(in, 61 << shift); real2 v660 = plus(v63, v127); real2 v654 = reverse(minus(v127, v63)); real2 v31 = load(in, 29 << shift); real2 v95 = load(in, 93 << shift); real2 v659 = plus(v31, v95); real2 v655 = minus(v95, v31); real2 v913 = reverse(minus(v660, v659)); real2 v919 = plus(v659, v660); real2 v658 = minusplus(uminus(v654), v655); real2 v656 = minusplus(v654, v655); real2 v666 = ctimesminusplus(reverse(v656), ctbl[46], ctimes(v656, ctbl[47])); real2 v670 = ctimesminusplus(reverse(v658), ctbl[57], ctimes(v658, ctbl[56])); real2 v47 = load(in, 45 << shift); real2 v111 = load(in, 109 << shift); real2 v377 = reverse(minus(v111, v47)); real2 v383 = plus(v47, v111); real2 v15 = load(in, 13 << shift); real2 v79 = load(in, 77 << shift); real2 v378 = minus(v79, v15); real2 v382 = plus(v15, v79); real2 v379 = minusplus(v377, v378); real2 v381 = minusplus(uminus(v377), v378); real2 v394 = ctimesminusplus(reverse(v381), ctbl[54], ctimes(v381, ctbl[36])); real2 v2167 = plus(v394, v670); real2 v2161 = reverse(minus(v670, v394)); real2 v914 = minus(v383, v382); real2 v918 = plus(v382, v383); real2 v1047 = plus(v918, v919); real2 v1041 = reverse(minus(v919, v918)); real2 v917 = minusplus(uminus(v913), v914); real2 v915 = minusplus(v913, v914); real2 v923 = ctimesminusplus(reverse(v915), ctbl[22], ctimes(v915, ctbl[23])); real2 v389 = ctimesminusplus(reverse(v379), ctbl[48], ctimes(v379, ctbl[49])); real2 v1760 = reverse(minus(v666, v389)); real2 v1766 = plus(v389, v666); real2 v927 = ctimesminusplus(reverse(v917), ctbl[27], ctimes(v917, ctbl[26])); real2 v87 = load(in, 85 << shift); real2 v23 = load(in, 21 << shift); real2 v520 = minus(v87, v23); real2 v524 = plus(v23, v87); real2 v55 = load(in, 53 << shift); real2 v119 = load(in, 117 << shift); real2 v519 = reverse(minus(v119, v55)); real2 v525 = plus(v55, v119); real2 v523 = minusplus(uminus(v519), v520); real2 v521 = minusplus(v519, v520); real2 v529 = ctimesminusplus(reverse(v521), ctbl[50], ctimes(v521, ctbl[51])); real2 v533 = ctimesminusplus(reverse(v523), ctbl[60], ctimes(v523, ctbl[30])); real2 v791 = plus(v524, v525); real2 v785 = reverse(minus(v525, v524)); real2 v39 = load(in, 37 << shift); real2 v103 = load(in, 101 << shift); real2 v231 = plus(v39, v103); real2 v225 = reverse(minus(v103, v39)); real2 v7 = load(in, 5 << shift); real2 v71 = load(in, 69 << shift); real2 v226 = minus(v71, v7); real2 v230 = plus(v7, v71); real2 v227 = minusplus(v225, v226); real2 v229 = minusplus(uminus(v225), v226); real2 v243 = ctimesminusplus(reverse(v229), ctbl[32], ctimes(v229, ctbl[33])); real2 v2162 = minus(v533, v243); real2 v2166 = plus(v243, v533); real2 v2231 = plus(v2166, v2167); real2 v2225 = reverse(minus(v2167, v2166)); real2 v237 = ctimesminusplus(reverse(v227), ctbl[52], ctimes(v227, ctbl[53])); real2 v1761 = minus(v529, v237); real2 v1765 = plus(v237, v529); real2 v1824 = reverse(minus(v1766, v1765)); real2 v1830 = plus(v1765, v1766); real2 v790 = plus(v230, v231); real2 v786 = minus(v231, v230); real2 v1826 = minusplus(v1824, v1825); real2 v1828 = minusplus(uminus(v1824), v1825); real2 v1764 = minusplus(uminus(v1760), v1761); real2 v1762 = minusplus(v1760, v1761); real2 v1889 = minus(v1830, v1829); real2 v1893 = plus(v1829, v1830); real2 v1838 = ctimesminusplus(reverse(v1828), ctbl[2], ctimes(v1828, ctbl[3])); real2 v1774 = ctimesminusplus(reverse(v1764), ctbl[12], ctimes(v1764, ctbl[6])); real2 v2041 = minus(v1774, v1710); real2 v2045 = plus(v1710, v1774); real2 v1770 = ctimesminusplus(reverse(v1762), ctbl[10], ctimes(v1762, ctbl[11])); real2 v1988 = plus(v1706, v1770); real2 v1984 = minus(v1770, v1706); real2 v1834 = ctimesminusplus(reverse(v1826), ctbl[4], ctimes(v1826, ctbl[5])); real2 v787 = minusplus(v785, v786); real2 v789 = minusplus(uminus(v785), v786); real2 v799 = ctimesminusplus(reverse(v789), ctbl[14], ctimes(v789, ctbl[15])); real2 v1046 = plus(v790, v791); real2 v1042 = minus(v791, v790); real2 v1043 = minusplus(v1041, v1042); real2 v1045 = minusplus(uminus(v1041), v1042); real2 v1537 = reverse(minus(v927, v799)); real2 v1543 = plus(v799, v927); real2 v1606 = plus(v1542, v1543); real2 v1602 = minus(v1543, v1542); real2 v795 = ctimesminusplus(reverse(v787), ctbl[24], ctimes(v787, ctbl[25])); real2 v1105 = reverse(minus(v1047, v1046)); real2 v1111 = plus(v1046, v1047); real2 v1384 = plus(v795, v923); real2 v1378 = reverse(minus(v923, v795)); real2 v1107 = minusplus(v1105, v1106); real2 v1109 = minusplus(uminus(v1105), v1106); real2 v1119 = ctimesminusplus(reverse(v1109), ctbl[2], ctimes(v1109, ctbl[3])); real2 v1382 = minusplus(uminus(v1378), v1379); real2 v1380 = minusplus(v1378, v1379); real2 v1388 = ctimesminusplus(reverse(v1380), ctbl[4], ctimes(v1380, ctbl[5])); real2 v1115 = ctimesminusplus(reverse(v1107), ctbl[4], ctimes(v1107, ctbl[5])); real2 v1392 = ctimesminusplus(reverse(v1382), ctbl[2], ctimes(v1382, ctbl[3])); real2 v1443 = minus(v1384, v1383); real2 v1447 = plus(v1383, v1384); real2 v1539 = minusplus(v1537, v1538); real2 v1541 = minusplus(uminus(v1537), v1538); real2 v1551 = ctimesminusplus(reverse(v1541), ctbl[2], ctimes(v1541, ctbl[3])); real2 v1547 = ctimesminusplus(reverse(v1539), ctbl[4], ctimes(v1539, ctbl[5])); real2 v1051 = ctimesminusplus(reverse(v1043), ctbl[10], ctimes(v1043, ctbl[11])); real2 v2290 = minus(v2231, v2230); real2 v2294 = plus(v2230, v2231); real2 v1174 = plus(v1110, v1111); real2 v1170 = minus(v1111, v1110); real2 v1265 = minus(v1051, v987); real2 v1269 = plus(v987, v1051); real2 v1055 = ctimesminusplus(reverse(v1045), ctbl[12], ctimes(v1045, ctbl[6])); real2 v1322 = minus(v1055, v991); real2 v1326 = plus(v991, v1055); real2 v129 = load(in, 127 << shift); real2 v65 = load(in, 63 << shift); real2 v688 = reverse(minus(v129, v65)); real2 v694 = plus(v65, v129); real2 v33 = load(in, 31 << shift); real2 v97 = load(in, 95 << shift); real2 v689 = minus(v97, v33); real2 v693 = plus(v33, v97); real2 v951 = plus(v693, v694); real2 v945 = reverse(minus(v694, v693)); real2 v692 = minusplus(uminus(v688), v689); real2 v690 = minusplus(v688, v689); real2 v699 = ctimesminusplus(reverse(v690), ctbl[30], ctimes(v690, ctbl[31])); real2 v17 = load(in, 15 << shift); real2 v81 = load(in, 79 << shift); real2 v420 = plus(v17, v81); real2 v416 = minus(v81, v17); real2 v113 = load(in, 111 << shift); real2 v49 = load(in, 47 << shift); real2 v415 = reverse(minus(v113, v49)); real2 v421 = plus(v49, v113); real2 v419 = minusplus(uminus(v415), v416); real2 v417 = minusplus(v415, v416); real2 v425 = ctimesminusplus(reverse(v417), ctbl[32], ctimes(v417, ctbl[33])); real2 v1792 = reverse(minus(v699, v425)); real2 v1798 = plus(v425, v699); real2 v950 = plus(v420, v421); real2 v946 = minus(v421, v420); real2 v947 = minusplus(v945, v946); real2 v949 = minusplus(uminus(v945), v946); real2 v959 = ctimesminusplus(reverse(v949), ctbl[21], ctimes(v949, ctbl[20])); real2 v955 = ctimesminusplus(reverse(v947), ctbl[14], ctimes(v947, ctbl[15])); real2 v1079 = plus(v950, v951); real2 v1073 = reverse(minus(v951, v950)); real2 v73 = load(in, 71 << shift); real2 v9 = load(in, 7 << shift); real2 v268 = plus(v9, v73); real2 v264 = minus(v73, v9); real2 v105 = load(in, 103 << shift); real2 v41 = load(in, 39 << shift); real2 v269 = plus(v41, v105); real2 v263 = reverse(minus(v105, v41)); real2 v818 = minus(v269, v268); real2 v822 = plus(v268, v269); real2 v267 = minusplus(uminus(v263), v264); real2 v265 = minusplus(v263, v264); real2 v275 = ctimesminusplus(reverse(v265), ctbl[36], ctimes(v265, ctbl[37])); real2 v89 = load(in, 87 << shift); real2 v25 = load(in, 23 << shift); real2 v557 = plus(v25, v89); real2 v553 = minus(v89, v25); real2 v121 = load(in, 119 << shift); real2 v57 = load(in, 55 << shift); real2 v558 = plus(v57, v121); real2 v552 = reverse(minus(v121, v57)); real2 v823 = plus(v557, v558); real2 v817 = reverse(minus(v558, v557)); real2 v1078 = plus(v822, v823); real2 v1074 = minus(v823, v822); real2 v819 = minusplus(v817, v818); real2 v821 = minusplus(uminus(v817), v818); real2 v1077 = minusplus(uminus(v1073), v1074); real2 v1075 = minusplus(v1073, v1074); real2 v1083 = ctimesminusplus(reverse(v1075), ctbl[6], ctimes(v1075, ctbl[7])); real2 v1087 = ctimesminusplus(reverse(v1077), ctbl[9], ctimes(v1077, ctbl[8])); real2 v556 = minusplus(uminus(v552), v553); real2 v554 = minusplus(v552, v553); real2 v564 = ctimesminusplus(reverse(v554), ctbl[34], ctimes(v554, ctbl[35])); real2 v1793 = minus(v564, v275); real2 v1797 = plus(v275, v564); real2 v1862 = plus(v1797, v1798); real2 v1856 = reverse(minus(v1798, v1797)); real2 v1794 = minusplus(v1792, v1793); real2 v1796 = minusplus(uminus(v1792), v1793); real2 v827 = ctimesminusplus(reverse(v819), ctbl[16], ctimes(v819, ctbl[17])); real2 v1410 = reverse(minus(v955, v827)); real2 v1416 = plus(v827, v955); real2 v1143 = plus(v1078, v1079); real2 v1137 = reverse(minus(v1079, v1078)); real2 v831 = ctimesminusplus(reverse(v821), ctbl[18], ctimes(v821, ctbl[24])); real2 v1575 = plus(v831, v959); real2 v1569 = reverse(minus(v959, v831)); real2 v5 = load(in, 3 << shift); real2 v69 = load(in, 67 << shift); real2 v188 = minus(v69, v5); real2 v192 = plus(v5, v69); real2 v101 = load(in, 99 << shift); real2 v37 = load(in, 35 << shift); real2 v193 = plus(v37, v101); real2 v187 = reverse(minus(v101, v37)); real2 v754 = minus(v193, v192); real2 v758 = plus(v192, v193); real2 v189 = minusplus(v187, v188); real2 v191 = minusplus(uminus(v187), v188); real2 v197 = ctimesminusplus(reverse(v189), ctbl[44], ctimes(v189, ctbl[45])); real2 v21 = load(in, 19 << shift); real2 v85 = load(in, 83 << shift); real2 v485 = minus(v85, v21); real2 v489 = plus(v21, v85); real2 v53 = load(in, 51 << shift); real2 v117 = load(in, 115 << shift); real2 v484 = reverse(minus(v117, v53)); real2 v490 = plus(v53, v117); real2 v753 = reverse(minus(v490, v489)); real2 v759 = plus(v489, v490); real2 v757 = minusplus(uminus(v753), v754); real2 v755 = minusplus(v753, v754); real2 v767 = ctimesminusplus(reverse(v757), ctbl[26], ctimes(v757, ctbl[27])); real2 v763 = ctimesminusplus(reverse(v755), ctbl[20], ctimes(v755, ctbl[21])); real2 v486 = minusplus(v484, v485); real2 v488 = minusplus(uminus(v484), v485); real2 v495 = ctimesminusplus(reverse(v486), ctbl[42], ctimes(v486, ctbl[43])); real2 v1729 = minus(v495, v197); real2 v1733 = plus(v197, v495); real2 v1014 = plus(v758, v759); real2 v1010 = minus(v759, v758); real2 v13 = load(in, 11 << shift); real2 v77 = load(in, 75 << shift); real2 v342 = minus(v77, v13); real2 v346 = plus(v13, v77); real2 v45 = load(in, 43 << shift); real2 v109 = load(in, 107 << shift); real2 v347 = plus(v45, v109); real2 v341 = reverse(minus(v109, v45)); real2 v345 = minusplus(uminus(v341), v342); real2 v343 = minusplus(v341, v342); real2 v882 = minus(v347, v346); real2 v886 = plus(v346, v347); real2 v353 = ctimesminusplus(reverse(v343), ctbl[40], ctimes(v343, ctbl[41])); real2 v125 = load(in, 123 << shift); real2 v61 = load(in, 59 << shift); real2 v621 = reverse(minus(v125, v61)); real2 v627 = plus(v61, v125); real2 v93 = load(in, 91 << shift); real2 v29 = load(in, 27 << shift); real2 v622 = minus(v93, v29); real2 v626 = plus(v29, v93); real2 v887 = plus(v626, v627); real2 v881 = reverse(minus(v627, v626)); real2 v885 = minusplus(uminus(v881), v882); real2 v883 = minusplus(v881, v882); real2 v891 = ctimesminusplus(reverse(v883), ctbl[18], ctimes(v883, ctbl[19])); real2 v1009 = reverse(minus(v887, v886)); real2 v1015 = plus(v886, v887); real2 v1415 = plus(v763, v891); real2 v1411 = minus(v891, v763); real2 v895 = ctimesminusplus(reverse(v885), ctbl[15], ctimes(v885, ctbl[14])); real2 v1570 = minus(v895, v767); real2 v1574 = plus(v767, v895); real2 v1142 = plus(v1014, v1015); real2 v1138 = minus(v1015, v1014); real2 v1011 = minusplus(v1009, v1010); real2 v1013 = minusplus(uminus(v1009), v1010); real2 v1023 = ctimesminusplus(reverse(v1013), ctbl[6], ctimes(v1013, ctbl[12])); real2 v1019 = ctimesminusplus(reverse(v1011), ctbl[8], ctimes(v1011, ctbl[9])); real2 v1321 = reverse(minus(v1087, v1023)); real2 v1327 = plus(v1023, v1087); real2 v1414 = minusplus(uminus(v1410), v1411); real2 v1412 = minusplus(v1410, v1411); real2 v1424 = ctimesminusplus(reverse(v1414), ctbl[3], ctimes(v1414, ctbl[2])); real2 v1508 = plus(v1392, v1424); real2 v1502 = reverse(minus(v1424, v1392)); real2 v1264 = reverse(minus(v1083, v1019)); real2 v1270 = plus(v1019, v1083); real2 v1286 = plus(v1269, v1270); real2 v1280 = reverse(minus(v1270, v1269)); real2 v1268 = minusplus(uminus(v1264), v1265); real2 v1266 = minusplus(v1264, v1265); real2 v1141 = minusplus(uminus(v1137), v1138); real2 v1139 = minusplus(v1137, v1138); real2 v1147 = ctimesminusplus(reverse(v1139), ctbl[2], ctimes(v1139, ctbl[3])); real2 v1278 = ctimesminusplus(reverse(v1268), ctbl[0], ctimes(v1268, ctbl[0])); real2 v1151 = ctimesminusplus(reverse(v1141), ctbl[3], ctimes(v1141, ctbl[2])); real2 v1235 = plus(v1119, v1151); real2 v1229 = reverse(minus(v1151, v1119)); real2 v1420 = ctimesminusplus(reverse(v1412), ctbl[2], ctimes(v1412, ctbl[3])); real2 v1483 = reverse(minus(v1420, v1388)); real2 v1489 = plus(v1388, v1420); real2 v1274 = ctimesminusplus(reverse(v1266), ctbl[0], ctimes(v1266, ctbl[1])); real2 v1607 = plus(v1574, v1575); real2 v1601 = reverse(minus(v1575, v1574)); real2 v1605 = minusplus(uminus(v1601), v1602); real2 v1603 = minusplus(v1601, v1602); real2 v1175 = plus(v1142, v1143); real2 v1169 = reverse(minus(v1143, v1142)); real2 v1171 = minusplus(v1169, v1170); real2 v1173 = minusplus(uminus(v1169), v1170); real2 v1179 = ctimesminusplus(reverse(v1171), ctbl[0], ctimes(v1171, ctbl[1])); real2 v1191 = plus(v1174, v1175); real2 v1185 = reverse(minus(v1175, v1174)); real2 v1325 = minusplus(uminus(v1321), v1322); real2 v1323 = minusplus(v1321, v1322); real2 v1331 = ctimesminusplus(reverse(v1323), ctbl[0], ctimes(v1323, ctbl[1])); real2 v1448 = plus(v1415, v1416); real2 v1442 = reverse(minus(v1416, v1415)); real2 v1446 = minusplus(uminus(v1442), v1443); real2 v1444 = minusplus(v1442, v1443); real2 v1452 = ctimesminusplus(reverse(v1444), ctbl[0], ctimes(v1444, ctbl[1])); real2 v1464 = plus(v1447, v1448); real2 v1458 = reverse(minus(v1448, v1447)); real2 v1335 = ctimesminusplus(reverse(v1325), ctbl[0], ctimes(v1325, ctbl[0])); real2 v1337 = reverse(minus(v1327, v1326)); real2 v1343 = plus(v1326, v1327); real2 v1183 = ctimesminusplus(reverse(v1173), ctbl[0], ctimes(v1173, ctbl[0])); real2 v1456 = ctimesminusplus(reverse(v1446), ctbl[0], ctimes(v1446, ctbl[0])); real2 v1210 = reverse(minus(v1147, v1115)); real2 v1216 = plus(v1115, v1147); real2 v1623 = plus(v1606, v1607); real2 v1617 = reverse(minus(v1607, v1606)); real2 v1571 = minusplus(v1569, v1570); real2 v1573 = minusplus(uminus(v1569), v1570); real2 v1583 = ctimesminusplus(reverse(v1573), ctbl[3], ctimes(v1573, ctbl[2])); real2 v1661 = reverse(minus(v1583, v1551)); real2 v1667 = plus(v1551, v1583); real2 v1611 = ctimesminusplus(reverse(v1603), ctbl[0], ctimes(v1603, ctbl[1])); real2 v1615 = ctimesminusplus(reverse(v1605), ctbl[0], ctimes(v1605, ctbl[0])); real2 v1579 = ctimesminusplus(reverse(v1571), ctbl[2], ctimes(v1571, ctbl[3])); real2 v1648 = plus(v1547, v1579); real2 v1642 = reverse(minus(v1579, v1547)); real2 v92 = load(in, 90 << shift); real2 v28 = load(in, 26 << shift); real2 v604 = minus(v92, v28); real2 v608 = plus(v28, v92); real2 v60 = load(in, 58 << shift); real2 v124 = load(in, 122 << shift); real2 v603 = reverse(minus(v124, v60)); real2 v609 = plus(v60, v124); real2 v871 = plus(v608, v609); real2 v865 = reverse(minus(v609, v608)); real2 v76 = load(in, 74 << shift); real2 v12 = load(in, 10 << shift); real2 v322 = minus(v76, v12); real2 v326 = plus(v12, v76); real2 v108 = load(in, 106 << shift); real2 v44 = load(in, 42 << shift); real2 v321 = reverse(minus(v108, v44)); real2 v327 = plus(v44, v108); real2 v870 = plus(v326, v327); real2 v866 = minus(v327, v326); real2 v993 = reverse(minus(v871, v870)); real2 v999 = plus(v870, v871); real2 v869 = minusplus(uminus(v865), v866); real2 v867 = minusplus(v865, v866); real2 v875 = ctimesminusplus(reverse(v867), ctbl[10], ctimes(v867, ctbl[11])); real2 v879 = ctimesminusplus(reverse(v869), ctbl[12], ctimes(v869, ctbl[6])); real2 v36 = load(in, 34 << shift); real2 v100 = load(in, 98 << shift); real2 v167 = reverse(minus(v100, v36)); real2 v173 = plus(v36, v100); real2 v68 = load(in, 66 << shift); real2 v4 = load(in, 2 << shift); real2 v168 = minus(v68, v4); real2 v172 = plus(v4, v68); real2 v742 = plus(v172, v173); real2 v738 = minus(v173, v172); real2 v52 = load(in, 50 << shift); real2 v116 = load(in, 114 << shift); real2 v468 = reverse(minus(v116, v52)); real2 v474 = plus(v52, v116); real2 v84 = load(in, 82 << shift); real2 v20 = load(in, 18 << shift); real2 v469 = minus(v84, v20); real2 v473 = plus(v20, v84); real2 v743 = plus(v473, v474); real2 v737 = reverse(minus(v474, v473)); real2 v739 = minusplus(v737, v738); real2 v741 = minusplus(uminus(v737), v738); real2 v747 = ctimesminusplus(reverse(v739), ctbl[12], ctimes(v739, ctbl[13])); real2 v998 = plus(v742, v743); real2 v994 = minus(v743, v742); real2 v1399 = plus(v747, v875); real2 v1395 = minus(v875, v747); real2 v1122 = minus(v999, v998); real2 v1126 = plus(v998, v999); real2 v997 = minusplus(uminus(v993), v994); real2 v995 = minusplus(v993, v994); real2 v1003 = ctimesminusplus(reverse(v995), ctbl[4], ctimes(v995, ctbl[5])); real2 v1007 = ctimesminusplus(reverse(v997), ctbl[2], ctimes(v997, ctbl[3])); real2 v80 = load(in, 78 << shift); real2 v16 = load(in, 14 << shift); real2 v397 = minus(v80, v16); real2 v401 = plus(v16, v80); real2 v48 = load(in, 46 << shift); real2 v112 = load(in, 110 << shift); real2 v402 = plus(v48, v112); real2 v396 = reverse(minus(v112, v48)); real2 v934 = plus(v401, v402); real2 v930 = minus(v402, v401); real2 v96 = load(in, 94 << shift); real2 v32 = load(in, 30 << shift); real2 v673 = minus(v96, v32); real2 v677 = plus(v32, v96); real2 v64 = load(in, 62 << shift); real2 v128 = load(in, 126 << shift); real2 v678 = plus(v64, v128); real2 v672 = reverse(minus(v128, v64)); real2 v929 = reverse(minus(v678, v677)); real2 v935 = plus(v677, v678); real2 v933 = minusplus(uminus(v929), v930); real2 v931 = minusplus(v929, v930); real2 v1057 = reverse(minus(v935, v934)); real2 v1063 = plus(v934, v935); real2 v939 = ctimesminusplus(reverse(v931), ctbl[6], ctimes(v931, ctbl[7])); real2 v104 = load(in, 102 << shift); real2 v40 = load(in, 38 << shift); real2 v251 = plus(v40, v104); real2 v245 = reverse(minus(v104, v40)); real2 v72 = load(in, 70 << shift); real2 v8 = load(in, 6 << shift); real2 v246 = minus(v72, v8); real2 v250 = plus(v8, v72); real2 v802 = minus(v251, v250); real2 v806 = plus(v250, v251); real2 v88 = load(in, 86 << shift); real2 v24 = load(in, 22 << shift); real2 v540 = plus(v24, v88); real2 v536 = minus(v88, v24); real2 v120 = load(in, 118 << shift); real2 v56 = load(in, 54 << shift); real2 v541 = plus(v56, v120); real2 v535 = reverse(minus(v120, v56)); real2 v807 = plus(v540, v541); real2 v801 = reverse(minus(v541, v540)); real2 v1062 = plus(v806, v807); real2 v1058 = minus(v807, v806); real2 v1059 = minusplus(v1057, v1058); real2 v1061 = minusplus(uminus(v1057), v1058); real2 v1127 = plus(v1062, v1063); real2 v1121 = reverse(minus(v1063, v1062)); real2 v1071 = ctimesminusplus(reverse(v1061), ctbl[3], ctimes(v1061, ctbl[2])); real2 v1067 = ctimesminusplus(reverse(v1059), ctbl[2], ctimes(v1059, ctbl[3])); real2 v1153 = reverse(minus(v1127, v1126)); real2 v1159 = plus(v1126, v1127); real2 v1123 = minusplus(v1121, v1122); real2 v1125 = minusplus(uminus(v1121), v1122); real2 v1254 = plus(v1003, v1067); real2 v1248 = reverse(minus(v1067, v1003)); real2 v1131 = ctimesminusplus(reverse(v1123), ctbl[0], ctimes(v1123, ctbl[1])); real2 v1305 = reverse(minus(v1071, v1007)); real2 v1311 = plus(v1007, v1071); real2 v1135 = ctimesminusplus(reverse(v1125), ctbl[0], ctimes(v1125, ctbl[0])); real2 v42 = load(in, 40 << shift); real2 v106 = load(in, 104 << shift); real2 v283 = reverse(minus(v106, v42)); real2 v289 = plus(v42, v106); real2 v10 = load(in, 8 << shift); real2 v74 = load(in, 72 << shift); real2 v284 = minus(v74, v10); real2 v288 = plus(v10, v74); real2 v838 = plus(v288, v289); real2 v834 = minus(v289, v288); real2 v66 = load(in, 64 << shift); real2 v2 = load(in, 0 << shift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << shift); real2 v34 = load(in, 32 << shift); real2 v131 = reverse(minus(v98, v34)); real2 v137 = plus(v34, v98); real2 v706 = minus(v137, v136); real2 v710 = plus(v136, v137); real2 v122 = load(in, 120 << shift); real2 v58 = load(in, 56 << shift); real2 v570 = reverse(minus(v122, v58)); real2 v576 = plus(v58, v122); real2 v90 = load(in, 88 << shift); real2 v26 = load(in, 24 << shift); real2 v575 = plus(v26, v90); real2 v571 = minus(v90, v26); real2 v839 = plus(v575, v576); real2 v833 = reverse(minus(v576, v575)); real2 v961 = reverse(minus(v839, v838)); real2 v967 = plus(v838, v839); real2 v50 = load(in, 48 << shift); real2 v114 = load(in, 112 << shift); real2 v438 = plus(v50, v114); real2 v432 = reverse(minus(v114, v50)); real2 v82 = load(in, 80 << shift); real2 v18 = load(in, 16 << shift); real2 v433 = minus(v82, v18); real2 v437 = plus(v18, v82); real2 v705 = reverse(minus(v438, v437)); real2 v711 = plus(v437, v438); real2 v962 = minus(v711, v710); real2 v966 = plus(v710, v711); real2 v1094 = plus(v966, v967); real2 v1090 = minus(v967, v966); real2 v126 = load(in, 124 << shift); real2 v62 = load(in, 60 << shift); real2 v643 = plus(v62, v126); real2 v637 = reverse(minus(v126, v62)); real2 v30 = load(in, 28 << shift); real2 v94 = load(in, 92 << shift); real2 v638 = minus(v94, v30); real2 v642 = plus(v30, v94); real2 v903 = plus(v642, v643); real2 v897 = reverse(minus(v643, v642)); real2 v14 = load(in, 12 << shift); real2 v78 = load(in, 76 << shift); real2 v361 = minus(v78, v14); real2 v365 = plus(v14, v78); real2 v46 = load(in, 44 << shift); real2 v110 = load(in, 108 << shift); real2 v360 = reverse(minus(v110, v46)); real2 v366 = plus(v46, v110); real2 v898 = minus(v366, v365); real2 v902 = plus(v365, v366); real2 v1031 = plus(v902, v903); real2 v1025 = reverse(minus(v903, v902)); real2 v102 = load(in, 100 << shift); real2 v38 = load(in, 36 << shift); real2 v205 = reverse(minus(v102, v38)); real2 v211 = plus(v38, v102); real2 v70 = load(in, 68 << shift); real2 v6 = load(in, 4 << shift); real2 v210 = plus(v6, v70); real2 v206 = minus(v70, v6); real2 v770 = minus(v211, v210); real2 v774 = plus(v210, v211); real2 v86 = load(in, 84 << shift); real2 v22 = load(in, 20 << shift); real2 v502 = minus(v86, v22); real2 v506 = plus(v22, v86); real2 v118 = load(in, 116 << shift); real2 v54 = load(in, 52 << shift); real2 v501 = reverse(minus(v118, v54)); real2 v507 = plus(v54, v118); real2 v769 = reverse(minus(v507, v506)); real2 v775 = plus(v506, v507); real2 v1030 = plus(v774, v775); real2 v1026 = minus(v775, v774); real2 v1089 = reverse(minus(v1031, v1030)); real2 v1095 = plus(v1030, v1031); real2 v1099 = minus(uplusminus(v1089), v1090); real2 v1103 = minus(uminusplus(v1089), v1090); real2 v1215 = plus(v1099, v1131); store(out, 72 << shift, minus(v1215, v1216)); store(out, 8 << shift, plus(v1215, v1216)); real2 v1211 = minus(v1131, v1099); store(out, 40 << shift, minus(uplusminus(v1210), v1211)); store(out, 104 << shift, minus(uminusplus(v1210), v1211)); real2 v1234 = plus(v1103, v1135); real2 v1230 = minus(v1135, v1103); store(out, 120 << shift, minus(uminusplus(v1229), v1230)); store(out, 56 << shift, minus(uplusminus(v1229), v1230)); store(out, 24 << shift, plus(v1234, v1235)); store(out, 88 << shift, minus(v1234, v1235)); real2 v1154 = minus(v1095, v1094); real2 v1158 = plus(v1094, v1095); real2 v1167 = minus(uminusplus(v1153), v1154); real2 v1163 = minus(uplusminus(v1153), v1154); store(out, 16 << shift, plus(v1163, v1179)); store(out, 80 << shift, minus(v1163, v1179)); store(out, 48 << shift, plus(v1167, v1183)); store(out, 112 << shift, minus(v1167, v1183)); real2 v1186 = minus(v1159, v1158); real2 v1190 = plus(v1158, v1159); store(out, 0 << shift, plus(v1190, v1191)); store(out, 64 << shift, minus(v1190, v1191)); store(out, 32 << shift, minus(uplusminus(v1185), v1186)); store(out, 96 << shift, minus(uminusplus(v1185), v1186)); real2 v1027 = minusplus(v1025, v1026); real2 v1029 = minusplus(uminus(v1025), v1026); real2 v971 = minus(uplusminus(v961), v962); real2 v975 = minus(uminusplus(v961), v962); real2 v1039 = ctimesminusplus(reverse(v1029), ctbl[0], ctimes(v1029, ctbl[0])); real2 v1310 = plus(v975, v1039); real2 v1306 = minus(v1039, v975); real2 v1342 = plus(v1310, v1311); real2 v1338 = minus(v1311, v1310); store(out, 12 << shift, plus(v1342, v1343)); store(out, 76 << shift, minus(v1342, v1343)); store(out, 108 << shift, minus(uminusplus(v1337), v1338)); store(out, 44 << shift, minus(uplusminus(v1337), v1338)); real2 v1315 = minus(uplusminus(v1305), v1306); store(out, 28 << shift, plus(v1315, v1331)); store(out, 92 << shift, minus(v1315, v1331)); real2 v1319 = minus(uminusplus(v1305), v1306); store(out, 60 << shift, plus(v1319, v1335)); store(out, 124 << shift, minus(v1319, v1335)); real2 v1035 = ctimesminusplus(reverse(v1027), ctbl[0], ctimes(v1027, ctbl[1])); real2 v1253 = plus(v971, v1035); real2 v1249 = minus(v1035, v971); real2 v1281 = minus(v1254, v1253); store(out, 36 << shift, minus(uplusminus(v1280), v1281)); store(out, 100 << shift, minus(uminusplus(v1280), v1281)); real2 v1285 = plus(v1253, v1254); store(out, 68 << shift, minus(v1285, v1286)); store(out, 4 << shift, plus(v1285, v1286)); real2 v1262 = minus(uminusplus(v1248), v1249); store(out, 116 << shift, minus(v1262, v1278)); store(out, 52 << shift, plus(v1262, v1278)); real2 v1258 = minus(uplusminus(v1248), v1249); store(out, 20 << shift, plus(v1258, v1274)); store(out, 84 << shift, minus(v1258, v1274)); real2 v901 = minusplus(uminus(v897), v898); real2 v899 = minusplus(v897, v898); real2 v805 = minusplus(uminus(v801), v802); real2 v803 = minusplus(v801, v802); real2 v811 = ctimesminusplus(reverse(v803), ctbl[8], ctimes(v803, ctbl[9])); real2 v1400 = plus(v811, v939); real2 v1394 = reverse(minus(v939, v811)); real2 v837 = minusplus(uminus(v833), v834); real2 v835 = minusplus(v833, v834); real2 v843 = ctimesminusplus(reverse(v835), ctbl[0], ctimes(v835, ctbl[1])); real2 v773 = minusplus(uminus(v769), v770); real2 v771 = minusplus(v769, v770); real2 v1398 = minusplus(uminus(v1394), v1395); real2 v1396 = minusplus(v1394, v1395); real2 v907 = ctimesminusplus(reverse(v899), ctbl[2], ctimes(v899, ctbl[3])); real2 v715 = minus(uplusminus(v705), v706); real2 v719 = minus(uminusplus(v705), v706); real2 v1363 = minus(v843, v715); real2 v1367 = plus(v715, v843); real2 v1408 = ctimesminusplus(reverse(v1398), ctbl[0], ctimes(v1398, ctbl[0])); real2 v779 = ctimesminusplus(reverse(v771), ctbl[4], ctimes(v771, ctbl[5])); real2 v1362 = reverse(minus(v907, v779)); real2 v1368 = plus(v779, v907); real2 v1376 = minus(uminusplus(v1362), v1363); real2 v1372 = minus(uplusminus(v1362), v1363); real2 v1507 = plus(v1376, v1408); real2 v1503 = minus(v1408, v1376); store(out, 122 << shift, minus(uminusplus(v1502), v1503)); store(out, 58 << shift, minus(uplusminus(v1502), v1503)); store(out, 90 << shift, minus(v1507, v1508)); store(out, 26 << shift, plus(v1507, v1508)); real2 v1404 = ctimesminusplus(reverse(v1396), ctbl[0], ctimes(v1396, ctbl[1])); real2 v1484 = minus(v1404, v1372); real2 v1488 = plus(v1372, v1404); store(out, 10 << shift, plus(v1488, v1489)); store(out, 74 << shift, minus(v1488, v1489)); store(out, 106 << shift, minus(uminusplus(v1483), v1484)); store(out, 42 << shift, minus(uplusminus(v1483), v1484)); real2 v1426 = reverse(minus(v1400, v1399)); real2 v1432 = plus(v1399, v1400); real2 v1431 = plus(v1367, v1368); real2 v1427 = minus(v1368, v1367); real2 v1463 = plus(v1431, v1432); store(out, 66 << shift, minus(v1463, v1464)); store(out, 2 << shift, plus(v1463, v1464)); real2 v1459 = minus(v1432, v1431); store(out, 98 << shift, minus(uminusplus(v1458), v1459)); store(out, 34 << shift, minus(uplusminus(v1458), v1459)); real2 v1436 = minus(uplusminus(v1426), v1427); store(out, 82 << shift, minus(v1436, v1452)); store(out, 18 << shift, plus(v1436, v1452)); real2 v1440 = minus(uminusplus(v1426), v1427); store(out, 50 << shift, plus(v1440, v1456)); store(out, 114 << shift, minus(v1440, v1456)); real2 v911 = ctimesminusplus(reverse(v901), ctbl[3], ctimes(v901, ctbl[2])); real2 v783 = ctimesminusplus(reverse(v773), ctbl[2], ctimes(v773, ctbl[3])); real2 v1527 = plus(v783, v911); real2 v1521 = reverse(minus(v911, v783)); real2 v943 = ctimesminusplus(reverse(v933), ctbl[9], ctimes(v933, ctbl[8])); real2 v847 = ctimesminusplus(reverse(v837), ctbl[0], ctimes(v837, ctbl[0])); real2 v1522 = minus(v847, v719); real2 v1526 = plus(v719, v847); real2 v1590 = plus(v1526, v1527); real2 v1586 = minus(v1527, v1526); real2 v815 = ctimesminusplus(reverse(v805), ctbl[6], ctimes(v805, ctbl[12])); real2 v1559 = plus(v815, v943); real2 v1553 = reverse(minus(v943, v815)); real2 v751 = ctimesminusplus(reverse(v741), ctbl[8], ctimes(v741, ctbl[9])); real2 v1558 = plus(v751, v879); real2 v1554 = minus(v879, v751); real2 v1585 = reverse(minus(v1559, v1558)); real2 v1591 = plus(v1558, v1559); real2 v1618 = minus(v1591, v1590); real2 v1622 = plus(v1590, v1591); store(out, 70 << shift, minus(v1622, v1623)); store(out, 6 << shift, plus(v1622, v1623)); store(out, 102 << shift, minus(uminusplus(v1617), v1618)); store(out, 38 << shift, minus(uplusminus(v1617), v1618)); real2 v1599 = minus(uminusplus(v1585), v1586); real2 v1595 = minus(uplusminus(v1585), v1586); store(out, 22 << shift, plus(v1595, v1611)); store(out, 86 << shift, minus(v1595, v1611)); store(out, 54 << shift, plus(v1599, v1615)); store(out, 118 << shift, minus(v1599, v1615)); real2 v1555 = minusplus(v1553, v1554); real2 v1557 = minusplus(uminus(v1553), v1554); real2 v1531 = minus(uplusminus(v1521), v1522); real2 v1535 = minus(uminusplus(v1521), v1522); real2 v1567 = ctimesminusplus(reverse(v1557), ctbl[0], ctimes(v1557, ctbl[0])); real2 v1666 = plus(v1535, v1567); store(out, 94 << shift, minus(v1666, v1667)); store(out, 30 << shift, plus(v1666, v1667)); real2 v1662 = minus(v1567, v1535); store(out, 126 << shift, minus(uminusplus(v1661), v1662)); store(out, 62 << shift, minus(uplusminus(v1661), v1662)); real2 v1563 = ctimesminusplus(reverse(v1555), ctbl[0], ctimes(v1555, ctbl[1])); real2 v1643 = minus(v1563, v1531); store(out, 46 << shift, minus(uplusminus(v1642), v1643)); store(out, 110 << shift, minus(uminusplus(v1642), v1643)); real2 v1647 = plus(v1531, v1563); store(out, 78 << shift, minus(v1647, v1648)); store(out, 14 << shift, plus(v1647, v1648)); real2 v436 = minusplus(uminus(v432), v433); real2 v434 = minusplus(v432, v433); real2 v145 = minus(uminusplus(v131), v132); real2 v141 = minus(uplusminus(v131), v132); real2 v607 = minusplus(uminus(v603), v604); real2 v605 = minusplus(v603, v604); real2 v615 = ctimesminusplus(reverse(v605), ctbl[22], ctimes(v605, ctbl[23])); real2 v325 = minusplus(uminus(v321), v322); real2 v323 = minusplus(v321, v322); real2 v171 = minusplus(uminus(v167), v168); real2 v169 = minusplus(v167, v168); real2 v179 = ctimesminusplus(reverse(v169), ctbl[28], ctimes(v169, ctbl[29])); real2 v333 = ctimesminusplus(reverse(v323), ctbl[24], ctimes(v323, ctbl[25])); real2 v1718 = plus(v333, v615); real2 v1712 = reverse(minus(v615, v333)); real2 v470 = minusplus(v468, v469); real2 v472 = minusplus(uminus(v468), v469); real2 v478 = ctimesminusplus(reverse(v470), ctbl[26], ctimes(v470, ctbl[27])); real2 v1717 = plus(v179, v478); real2 v1713 = minus(v478, v179); real2 v1841 = minus(v1718, v1717); real2 v1845 = plus(v1717, v1718); real2 v674 = minusplus(v672, v673); real2 v676 = minusplus(uminus(v672), v673); real2 v249 = minusplus(uminus(v245), v246); real2 v247 = minusplus(v245, v246); real2 v255 = ctimesminusplus(reverse(v247), ctbl[20], ctimes(v247, ctbl[21])); real2 v398 = minusplus(v396, v397); real2 v400 = minusplus(uminus(v396), v397); real2 v408 = ctimesminusplus(reverse(v398), ctbl[16], ctimes(v398, ctbl[17])); real2 v572 = minusplus(v570, v571); real2 v574 = minusplus(uminus(v570), v571); real2 v625 = minusplus(uminus(v621), v622); real2 v623 = minusplus(v621, v622); real2 v631 = ctimesminusplus(reverse(v623), ctbl[38], ctimes(v623, ctbl[39])); real2 v1728 = reverse(minus(v631, v353)); real2 v1734 = plus(v353, v631); real2 v1857 = minus(v1734, v1733); real2 v1861 = plus(v1733, v1734); real2 v580 = ctimesminusplus(reverse(v572), ctbl[2], ctimes(v572, ctbl[3])); real2 v537 = minusplus(v535, v536); real2 v539 = minusplus(uminus(v535), v536); real2 v546 = ctimesminusplus(reverse(v537), ctbl[18], ctimes(v537, ctbl[19])); real2 v682 = ctimesminusplus(reverse(v674), ctbl[14], ctimes(v674, ctbl[15])); real2 v1776 = reverse(minus(v682, v408)); real2 v1782 = plus(v408, v682); real2 v641 = minusplus(uminus(v637), v638); real2 v639 = minusplus(v637, v638); real2 v287 = minusplus(uminus(v283), v284); real2 v285 = minusplus(v283, v284); real2 v295 = ctimesminusplus(reverse(v285), ctbl[4], ctimes(v285, ctbl[5])); real2 v1680 = reverse(minus(v580, v295)); real2 v1686 = plus(v295, v580); real2 v505 = minusplus(uminus(v501), v502); real2 v503 = minusplus(v501, v502); real2 v513 = ctimesminusplus(reverse(v503), ctbl[10], ctimes(v503, ctbl[11])); real2 v444 = ctimesminusplus(reverse(v434), ctbl[0], ctimes(v434, ctbl[1])); real2 v648 = ctimesminusplus(reverse(v639), ctbl[6], ctimes(v639, ctbl[7])); real2 v1685 = plus(v141, v444); real2 v1681 = minus(v444, v141); real2 v1888 = reverse(minus(v1862, v1861)); real2 v1894 = plus(v1861, v1862); real2 v1910 = plus(v1893, v1894); real2 v1904 = reverse(minus(v1894, v1893)); real2 v1813 = plus(v1685, v1686); real2 v1809 = minus(v1686, v1685); real2 v1777 = minus(v546, v255); real2 v1781 = plus(v255, v546); real2 v207 = minusplus(v205, v206); real2 v209 = minusplus(uminus(v205), v206); real2 v1840 = reverse(minus(v1782, v1781)); real2 v1846 = plus(v1781, v1782); real2 v1878 = plus(v1845, v1846); real2 v1872 = reverse(minus(v1846, v1845)); real2 v364 = minusplus(uminus(v360), v361); real2 v362 = minusplus(v360, v361); real2 v370 = ctimesminusplus(reverse(v362), ctbl[8], ctimes(v362, ctbl[9])); real2 v1744 = reverse(minus(v648, v370)); real2 v1750 = plus(v370, v648); real2 v217 = ctimesminusplus(reverse(v207), ctbl[12], ctimes(v207, ctbl[13])); real2 v1749 = plus(v217, v513); real2 v1745 = minus(v513, v217); real2 v1814 = plus(v1749, v1750); real2 v1808 = reverse(minus(v1750, v1749)); real2 v1877 = plus(v1813, v1814); real2 v1873 = minus(v1814, v1813); real2 v1905 = minus(v1878, v1877); real2 v1909 = plus(v1877, v1878); store(out, 33 << shift, minus(uplusminus(v1904), v1905)); store(out, 97 << shift, minus(uminusplus(v1904), v1905)); store(out, 65 << shift, minus(v1909, v1910)); store(out, 1 << shift, plus(v1909, v1910)); real2 v1890 = minusplus(v1888, v1889); real2 v1892 = minusplus(uminus(v1888), v1889); real2 v1902 = ctimesminusplus(reverse(v1892), ctbl[0], ctimes(v1892, ctbl[0])); real2 v1886 = minus(uminusplus(v1872), v1873); store(out, 49 << shift, plus(v1886, v1902)); store(out, 113 << shift, minus(v1886, v1902)); real2 v1882 = minus(uplusminus(v1872), v1873); real2 v1898 = ctimesminusplus(reverse(v1890), ctbl[0], ctimes(v1890, ctbl[1])); store(out, 81 << shift, minus(v1882, v1898)); store(out, 17 << shift, plus(v1882, v1898)); real2 v1858 = minusplus(v1856, v1857); real2 v1860 = minusplus(uminus(v1856), v1857); real2 v1870 = ctimesminusplus(reverse(v1860), ctbl[3], ctimes(v1860, ctbl[2])); real2 v1948 = reverse(minus(v1870, v1838)); real2 v1954 = plus(v1838, v1870); real2 v1822 = minus(uminusplus(v1808), v1809); real2 v1818 = minus(uplusminus(v1808), v1809); real2 v1842 = minusplus(v1840, v1841); real2 v1844 = minusplus(uminus(v1840), v1841); real2 v1854 = ctimesminusplus(reverse(v1844), ctbl[0], ctimes(v1844, ctbl[0])); real2 v1949 = minus(v1854, v1822); store(out, 121 << shift, minus(uminusplus(v1948), v1949)); store(out, 57 << shift, minus(uplusminus(v1948), v1949)); real2 v1953 = plus(v1822, v1854); store(out, 89 << shift, minus(v1953, v1954)); store(out, 25 << shift, plus(v1953, v1954)); real2 v1850 = ctimesminusplus(reverse(v1842), ctbl[0], ctimes(v1842, ctbl[1])); real2 v1866 = ctimesminusplus(reverse(v1858), ctbl[2], ctimes(v1858, ctbl[3])); real2 v1929 = reverse(minus(v1866, v1834)); real2 v1935 = plus(v1834, v1866); real2 v1930 = minus(v1850, v1818); store(out, 105 << shift, minus(uminusplus(v1929), v1930)); store(out, 41 << shift, minus(uplusminus(v1929), v1930)); real2 v1934 = plus(v1818, v1850); store(out, 73 << shift, minus(v1934, v1935)); store(out, 9 << shift, plus(v1934, v1935)); real2 v1690 = minus(uplusminus(v1680), v1681); real2 v1694 = minus(uminusplus(v1680), v1681); real2 v1716 = minusplus(uminus(v1712), v1713); real2 v1714 = minusplus(v1712, v1713); real2 v1730 = minusplus(v1728, v1729); real2 v1732 = minusplus(uminus(v1728), v1729); real2 v1742 = ctimesminusplus(reverse(v1732), ctbl[6], ctimes(v1732, ctbl[12])); real2 v1726 = ctimesminusplus(reverse(v1716), ctbl[2], ctimes(v1716, ctbl[3])); real2 v1780 = minusplus(uminus(v1776), v1777); real2 v1778 = minusplus(v1776, v1777); real2 v1790 = ctimesminusplus(reverse(v1780), ctbl[3], ctimes(v1780, ctbl[2])); real2 v2030 = plus(v1726, v1790); real2 v2024 = reverse(minus(v1790, v1726)); real2 v1806 = ctimesminusplus(reverse(v1796), ctbl[9], ctimes(v1796, ctbl[8])); real2 v1746 = minusplus(v1744, v1745); real2 v1748 = minusplus(uminus(v1744), v1745); real2 v2040 = reverse(minus(v1806, v1742)); real2 v2046 = plus(v1742, v1806); real2 v2062 = plus(v2045, v2046); real2 v2056 = reverse(minus(v2046, v2045)); real2 v1758 = ctimesminusplus(reverse(v1748), ctbl[0], ctimes(v1748, ctbl[0])); real2 v2025 = minus(v1758, v1694); real2 v2029 = plus(v1694, v1758); real2 v2057 = minus(v2030, v2029); store(out, 109 << shift, minus(uminusplus(v2056), v2057)); store(out, 45 << shift, minus(uplusminus(v2056), v2057)); real2 v2061 = plus(v2029, v2030); store(out, 13 << shift, plus(v2061, v2062)); store(out, 77 << shift, minus(v2061, v2062)); real2 v2044 = minusplus(uminus(v2040), v2041); real2 v2042 = minusplus(v2040, v2041); real2 v2054 = ctimesminusplus(reverse(v2044), ctbl[0], ctimes(v2044, ctbl[0])); real2 v2038 = minus(uminusplus(v2024), v2025); real2 v2034 = minus(uplusminus(v2024), v2025); store(out, 125 << shift, minus(v2038, v2054)); store(out, 61 << shift, plus(v2038, v2054)); real2 v2050 = ctimesminusplus(reverse(v2042), ctbl[0], ctimes(v2042, ctbl[1])); store(out, 29 << shift, plus(v2034, v2050)); store(out, 93 << shift, minus(v2034, v2050)); real2 v1738 = ctimesminusplus(reverse(v1730), ctbl[8], ctimes(v1730, ctbl[9])); real2 v1802 = ctimesminusplus(reverse(v1794), ctbl[6], ctimes(v1794, ctbl[7])); real2 v1989 = plus(v1738, v1802); real2 v1983 = reverse(minus(v1802, v1738)); real2 v1999 = reverse(minus(v1989, v1988)); real2 v2005 = plus(v1988, v1989); real2 v1722 = ctimesminusplus(reverse(v1714), ctbl[4], ctimes(v1714, ctbl[5])); real2 v1786 = ctimesminusplus(reverse(v1778), ctbl[2], ctimes(v1778, ctbl[3])); real2 v1973 = plus(v1722, v1786); real2 v1967 = reverse(minus(v1786, v1722)); real2 v1754 = ctimesminusplus(reverse(v1746), ctbl[0], ctimes(v1746, ctbl[1])); real2 v1972 = plus(v1690, v1754); real2 v1968 = minus(v1754, v1690); real2 v2004 = plus(v1972, v1973); store(out, 5 << shift, plus(v2004, v2005)); store(out, 69 << shift, minus(v2004, v2005)); real2 v2000 = minus(v1973, v1972); store(out, 37 << shift, minus(uplusminus(v1999), v2000)); store(out, 101 << shift, minus(uminusplus(v1999), v2000)); real2 v1985 = minusplus(v1983, v1984); real2 v1987 = minusplus(uminus(v1983), v1984); real2 v1993 = ctimesminusplus(reverse(v1985), ctbl[0], ctimes(v1985, ctbl[1])); real2 v1977 = minus(uplusminus(v1967), v1968); store(out, 21 << shift, plus(v1977, v1993)); store(out, 85 << shift, minus(v1977, v1993)); real2 v1981 = minus(uminusplus(v1967), v1968); real2 v1997 = ctimesminusplus(reverse(v1987), ctbl[0], ctimes(v1987, ctbl[0])); store(out, 117 << shift, minus(v1981, v1997)); store(out, 53 << shift, plus(v1981, v1997)); real2 v703 = ctimesminusplus(reverse(v692), ctbl[45], ctimes(v692, ctbl[44])); real2 v261 = ctimesminusplus(reverse(v249), ctbl[26], ctimes(v249, ctbl[27])); real2 v550 = ctimesminusplus(reverse(v539), ctbl[15], ctimes(v539, ctbl[14])); real2 v413 = ctimesminusplus(reverse(v400), ctbl[18], ctimes(v400, ctbl[24])); real2 v2178 = minus(v550, v261); real2 v2182 = plus(v261, v550); real2 v686 = ctimesminusplus(reverse(v676), ctbl[21], ctimes(v676, ctbl[20])); real2 v2183 = plus(v413, v686); real2 v2177 = reverse(minus(v686, v413)); real2 v203 = ctimesminusplus(reverse(v191), ctbl[56], ctimes(v191, ctbl[57])); real2 v281 = ctimesminusplus(reverse(v267), ctbl[50], ctimes(v267, ctbl[51])); real2 v2247 = plus(v2182, v2183); real2 v2241 = reverse(minus(v2183, v2182)); real2 v619 = ctimesminusplus(reverse(v607), ctbl[27], ctimes(v607, ctbl[26])); real2 v358 = ctimesminusplus(reverse(v345), ctbl[30], ctimes(v345, ctbl[60])); real2 v499 = ctimesminusplus(reverse(v488), ctbl[36], ctimes(v488, ctbl[54])); real2 v2130 = minus(v499, v203); real2 v2134 = plus(v203, v499); real2 v185 = ctimesminusplus(reverse(v171), ctbl[20], ctimes(v171, ctbl[21])); real2 v339 = ctimesminusplus(reverse(v325), ctbl[14], ctimes(v325, ctbl[15])); real2 v2119 = plus(v339, v619); real2 v2113 = reverse(minus(v619, v339)); real2 v482 = ctimesminusplus(reverse(v472), ctbl[24], ctimes(v472, ctbl[18])); real2 v2114 = minus(v482, v185); real2 v2118 = plus(v185, v482); real2 v2246 = plus(v2118, v2119); real2 v2242 = minus(v2119, v2118); real2 v584 = ctimesminusplus(reverse(v574), ctbl[3], ctimes(v574, ctbl[2])); real2 v652 = ctimesminusplus(reverse(v641), ctbl[9], ctimes(v641, ctbl[8])); real2 v568 = ctimesminusplus(reverse(v556), ctbl[39], ctimes(v556, ctbl[38])); real2 v2194 = minus(v568, v281); real2 v2198 = plus(v281, v568); real2 v517 = ctimesminusplus(reverse(v505), ctbl[12], ctimes(v505, ctbl[6])); real2 v430 = ctimesminusplus(reverse(v419), ctbl[42], ctimes(v419, ctbl[48])); real2 v2193 = reverse(minus(v703, v430)); real2 v2199 = plus(v430, v703); real2 v2273 = reverse(minus(v2247, v2246)); real2 v2279 = plus(v2246, v2247); real2 v375 = ctimesminusplus(reverse(v364), ctbl[6], ctimes(v364, ctbl[12])); real2 v2145 = reverse(minus(v652, v375)); real2 v2151 = plus(v375, v652); real2 v2263 = plus(v2198, v2199); real2 v2257 = reverse(minus(v2199, v2198)); real2 v448 = ctimesminusplus(reverse(v436), ctbl[0], ctimes(v436, ctbl[0])); real2 v635 = ctimesminusplus(reverse(v625), ctbl[33], ctimes(v625, ctbl[32])); real2 v2129 = reverse(minus(v635, v358)); real2 v2135 = plus(v358, v635); real2 v2258 = minus(v2135, v2134); real2 v2262 = plus(v2134, v2135); real2 v2086 = plus(v145, v448); real2 v2082 = minus(v448, v145); real2 v301 = ctimesminusplus(reverse(v287), ctbl[2], ctimes(v287, ctbl[3])); real2 v223 = ctimesminusplus(reverse(v209), ctbl[8], ctimes(v209, ctbl[9])); real2 v2150 = plus(v223, v517); real2 v2146 = minus(v517, v223); real2 v2081 = reverse(minus(v584, v301)); real2 v2087 = plus(v301, v584); real2 v2210 = minus(v2087, v2086); real2 v2214 = plus(v2086, v2087); real2 v2215 = plus(v2150, v2151); real2 v2209 = reverse(minus(v2151, v2150)); real2 v2289 = reverse(minus(v2263, v2262)); real2 v2295 = plus(v2262, v2263); real2 v2311 = plus(v2294, v2295); real2 v2305 = reverse(minus(v2295, v2294)); real2 v2274 = minus(v2215, v2214); real2 v2278 = plus(v2214, v2215); real2 v2306 = minus(v2279, v2278); real2 v2310 = plus(v2278, v2279); store(out, 35 << shift, minus(uplusminus(v2305), v2306)); store(out, 99 << shift, minus(uminusplus(v2305), v2306)); store(out, 3 << shift, plus(v2310, v2311)); store(out, 67 << shift, minus(v2310, v2311)); real2 v2293 = minusplus(uminus(v2289), v2290); real2 v2291 = minusplus(v2289, v2290); real2 v2303 = ctimesminusplus(reverse(v2293), ctbl[0], ctimes(v2293, ctbl[0])); real2 v2287 = minus(uminusplus(v2273), v2274); store(out, 51 << shift, plus(v2287, v2303)); store(out, 115 << shift, minus(v2287, v2303)); real2 v2283 = minus(uplusminus(v2273), v2274); real2 v2299 = ctimesminusplus(reverse(v2291), ctbl[0], ctimes(v2291, ctbl[1])); store(out, 19 << shift, plus(v2283, v2299)); store(out, 83 << shift, minus(v2283, v2299)); real2 v2261 = minusplus(uminus(v2257), v2258); real2 v2259 = minusplus(v2257, v2258); real2 v2243 = minusplus(v2241, v2242); real2 v2245 = minusplus(uminus(v2241), v2242); real2 v2251 = ctimesminusplus(reverse(v2243), ctbl[0], ctimes(v2243, ctbl[1])); real2 v2267 = ctimesminusplus(reverse(v2259), ctbl[2], ctimes(v2259, ctbl[3])); real2 v2229 = minusplus(uminus(v2225), v2226); real2 v2227 = minusplus(v2225, v2226); real2 v2235 = ctimesminusplus(reverse(v2227), ctbl[4], ctimes(v2227, ctbl[5])); real2 v2330 = reverse(minus(v2267, v2235)); real2 v2336 = plus(v2235, v2267); real2 v2219 = minus(uplusminus(v2209), v2210); real2 v2223 = minus(uminusplus(v2209), v2210); real2 v2335 = plus(v2219, v2251); real2 v2331 = minus(v2251, v2219); store(out, 43 << shift, minus(uplusminus(v2330), v2331)); store(out, 107 << shift, minus(uminusplus(v2330), v2331)); store(out, 75 << shift, minus(v2335, v2336)); store(out, 11 << shift, plus(v2335, v2336)); real2 v2239 = ctimesminusplus(reverse(v2229), ctbl[2], ctimes(v2229, ctbl[3])); real2 v2271 = ctimesminusplus(reverse(v2261), ctbl[3], ctimes(v2261, ctbl[2])); real2 v2349 = reverse(minus(v2271, v2239)); real2 v2355 = plus(v2239, v2271); real2 v2255 = ctimesminusplus(reverse(v2245), ctbl[0], ctimes(v2245, ctbl[0])); real2 v2350 = minus(v2255, v2223); store(out, 59 << shift, minus(uplusminus(v2349), v2350)); store(out, 123 << shift, minus(uminusplus(v2349), v2350)); real2 v2354 = plus(v2223, v2255); store(out, 91 << shift, minus(v2354, v2355)); store(out, 27 << shift, plus(v2354, v2355)); real2 v2165 = minusplus(uminus(v2161), v2162); real2 v2163 = minusplus(v2161, v2162); real2 v2133 = minusplus(uminus(v2129), v2130); real2 v2131 = minusplus(v2129, v2130); real2 v2195 = minusplus(v2193, v2194); real2 v2197 = minusplus(uminus(v2193), v2194); real2 v2203 = ctimesminusplus(reverse(v2195), ctbl[6], ctimes(v2195, ctbl[7])); real2 v2171 = ctimesminusplus(reverse(v2163), ctbl[10], ctimes(v2163, ctbl[11])); real2 v2385 = minus(v2171, v2107); real2 v2389 = plus(v2107, v2171); real2 v2139 = ctimesminusplus(reverse(v2131), ctbl[8], ctimes(v2131, ctbl[9])); real2 v2390 = plus(v2139, v2203); real2 v2384 = reverse(minus(v2203, v2139)); real2 v2406 = plus(v2389, v2390); real2 v2400 = reverse(minus(v2390, v2389)); real2 v2181 = minusplus(uminus(v2177), v2178); real2 v2179 = minusplus(v2177, v2178); real2 v2091 = minus(uplusminus(v2081), v2082); real2 v2095 = minus(uminusplus(v2081), v2082); real2 v2117 = minusplus(uminus(v2113), v2114); real2 v2115 = minusplus(v2113, v2114); real2 v2123 = ctimesminusplus(reverse(v2115), ctbl[4], ctimes(v2115, ctbl[5])); real2 v2187 = ctimesminusplus(reverse(v2179), ctbl[2], ctimes(v2179, ctbl[3])); real2 v2374 = plus(v2123, v2187); real2 v2368 = reverse(minus(v2187, v2123)); real2 v2149 = minusplus(uminus(v2145), v2146); real2 v2147 = minusplus(v2145, v2146); real2 v2155 = ctimesminusplus(reverse(v2147), ctbl[0], ctimes(v2147, ctbl[1])); real2 v2373 = plus(v2091, v2155); real2 v2369 = minus(v2155, v2091); real2 v2401 = minus(v2374, v2373); real2 v2405 = plus(v2373, v2374); store(out, 71 << shift, minus(v2405, v2406)); store(out, 7 << shift, plus(v2405, v2406)); store(out, 103 << shift, minus(uminusplus(v2400), v2401)); store(out, 39 << shift, minus(uplusminus(v2400), v2401)); real2 v2388 = minusplus(uminus(v2384), v2385); real2 v2386 = minusplus(v2384, v2385); real2 v2398 = ctimesminusplus(reverse(v2388), ctbl[0], ctimes(v2388, ctbl[0])); real2 v2378 = minus(uplusminus(v2368), v2369); real2 v2382 = minus(uminusplus(v2368), v2369); store(out, 55 << shift, plus(v2382, v2398)); store(out, 119 << shift, minus(v2382, v2398)); real2 v2394 = ctimesminusplus(reverse(v2386), ctbl[0], ctimes(v2386, ctbl[1])); store(out, 23 << shift, plus(v2378, v2394)); store(out, 87 << shift, minus(v2378, v2394)); real2 v2207 = ctimesminusplus(reverse(v2197), ctbl[9], ctimes(v2197, ctbl[8])); real2 v2111 = ctimesminusplus(reverse(v2101), ctbl[8], ctimes(v2101, ctbl[9])); real2 v2175 = ctimesminusplus(reverse(v2165), ctbl[12], ctimes(v2165, ctbl[6])); real2 v2446 = plus(v2111, v2175); real2 v2442 = minus(v2175, v2111); real2 v2143 = ctimesminusplus(reverse(v2133), ctbl[6], ctimes(v2133, ctbl[12])); real2 v2441 = reverse(minus(v2207, v2143)); real2 v2447 = plus(v2143, v2207); real2 v2443 = minusplus(v2441, v2442); real2 v2445 = minusplus(uminus(v2441), v2442); real2 v2159 = ctimesminusplus(reverse(v2149), ctbl[0], ctimes(v2149, ctbl[0])); real2 v2455 = ctimesminusplus(reverse(v2445), ctbl[0], ctimes(v2445, ctbl[0])); real2 v2127 = ctimesminusplus(reverse(v2117), ctbl[2], ctimes(v2117, ctbl[3])); real2 v2191 = ctimesminusplus(reverse(v2181), ctbl[3], ctimes(v2181, ctbl[2])); real2 v2431 = plus(v2127, v2191); real2 v2425 = reverse(minus(v2191, v2127)); real2 v2426 = minus(v2159, v2095); real2 v2430 = plus(v2095, v2159); real2 v2439 = minus(uminusplus(v2425), v2426); store(out, 127 << shift, minus(v2439, v2455)); store(out, 63 << shift, plus(v2439, v2455)); real2 v2435 = minus(uplusminus(v2425), v2426); real2 v2451 = ctimesminusplus(reverse(v2443), ctbl[0], ctimes(v2443, ctbl[1])); store(out, 95 << shift, minus(v2435, v2451)); store(out, 31 << shift, plus(v2435, v2451)); real2 v2463 = plus(v2446, v2447); real2 v2457 = reverse(minus(v2447, v2446)); real2 v2458 = minus(v2431, v2430); store(out, 47 << shift, minus(uplusminus(v2457), v2458)); store(out, 111 << shift, minus(uminusplus(v2457), v2458)); real2 v2462 = plus(v2430, v2431); store(out, 79 << shift, minus(v2462, v2463)); store(out, 15 << shift, plus(v2462, v2463)); // Pres : 68088 } } ALIGNED(8192) void but128f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); // Pres : 148586 real2 v56 = load(in, 54 << inShift); real2 v120 = load(in, 118 << inShift); real2 v571 = reverse(minus(v56, v120)); real2 v577 = plus(v56, v120); real2 v24 = load(in, 22 << inShift); real2 v88 = load(in, 86 << inShift); real2 v576 = plus(v24, v88); real2 v572 = minus(v88, v24); real2 v573 = minusplus(v571, v572); real2 v575 = minusplus(uminus(v571), v572); real2 v589 = ctimesminusplus(reverse(v575), tbl[92 + tbloffset], ctimes(v575, tbl[93 + tbloffset])); real2 v583 = ctimesminusplus(reverse(v573), tbl[90 + tbloffset], ctimes(v573, tbl[91 + tbloffset])); real2 v897 = plus(v576, v577); real2 v891 = reverse(minus(v576, v577)); real2 v8 = load(in, 6 << inShift); real2 v72 = load(in, 70 << inShift); real2 v252 = minus(v72, v8); real2 v256 = plus(v8, v72); real2 v104 = load(in, 102 << inShift); real2 v40 = load(in, 38 << inShift); real2 v251 = reverse(minus(v40, v104)); real2 v257 = plus(v40, v104); real2 v255 = minusplus(uminus(v251), v252); real2 v253 = minusplus(v251, v252); real2 v263 = ctimesminusplus(reverse(v253), tbl[26 + tbloffset], ctimes(v253, tbl[27 + tbloffset])); real2 v896 = plus(v256, v257); real2 v892 = minus(v257, v256); real2 v895 = minusplus(uminus(v891), v892); real2 v893 = minusplus(v891, v892); real2 v909 = ctimesminusplus(reverse(v895), tbl[156 + tbloffset], ctimes(v895, tbl[157 + tbloffset])); real2 v903 = ctimesminusplus(reverse(v893), tbl[154 + tbloffset], ctimes(v893, tbl[155 + tbloffset])); real2 v269 = ctimesminusplus(reverse(v255), tbl[28 + tbloffset], ctimes(v255, tbl[29 + tbloffset])); real2 v1216 = plus(v896, v897); real2 v1212 = minus(v897, v896); real2 v2160 = minus(v583, v263); real2 v2164 = plus(v263, v583); real2 v2686 = minus(v589, v269); real2 v2690 = plus(v269, v589); real2 v96 = load(in, 94 << inShift); real2 v32 = load(in, 30 << inShift); real2 v736 = plus(v32, v96); real2 v732 = minus(v96, v32); real2 v64 = load(in, 62 << inShift); real2 v128 = load(in, 126 << inShift); real2 v737 = plus(v64, v128); real2 v731 = reverse(minus(v64, v128)); real2 v1057 = plus(v736, v737); real2 v1051 = reverse(minus(v736, v737)); real2 v733 = minusplus(v731, v732); real2 v735 = minusplus(uminus(v731), v732); real2 v749 = ctimesminusplus(reverse(v735), tbl[124 + tbloffset], ctimes(v735, tbl[125 + tbloffset])); real2 v743 = ctimesminusplus(reverse(v733), tbl[122 + tbloffset], ctimes(v733, tbl[123 + tbloffset])); real2 v16 = load(in, 14 << inShift); real2 v80 = load(in, 78 << inShift); real2 v412 = minus(v80, v16); real2 v416 = plus(v16, v80); real2 v112 = load(in, 110 << inShift); real2 v48 = load(in, 46 << inShift); real2 v417 = plus(v48, v112); real2 v411 = reverse(minus(v48, v112)); real2 v1056 = plus(v416, v417); real2 v1052 = minus(v417, v416); real2 v1055 = minusplus(uminus(v1051), v1052); real2 v1053 = minusplus(v1051, v1052); real2 v1063 = ctimesminusplus(reverse(v1053), tbl[186 + tbloffset], ctimes(v1053, tbl[187 + tbloffset])); real2 v1665 = plus(v903, v1063); real2 v1659 = reverse(minus(v903, v1063)); real2 v1069 = ctimesminusplus(reverse(v1055), tbl[188 + tbloffset], ctimes(v1055, tbl[189 + tbloffset])); real2 v1869 = reverse(minus(v909, v1069)); real2 v1875 = plus(v909, v1069); real2 v413 = minusplus(v411, v412); real2 v415 = minusplus(uminus(v411), v412); real2 v429 = ctimesminusplus(reverse(v415), tbl[60 + tbloffset], ctimes(v415, tbl[61 + tbloffset])); real2 v1217 = plus(v1056, v1057); real2 v1211 = reverse(minus(v1056, v1057)); real2 v1297 = plus(v1216, v1217); real2 v1291 = reverse(minus(v1216, v1217)); real2 v2691 = plus(v429, v749); real2 v2685 = reverse(minus(v429, v749)); real2 v2765 = reverse(minus(v2690, v2691)); real2 v2771 = plus(v2690, v2691); real2 v2689 = minusplus(uminus(v2685), v2686); real2 v2687 = minusplus(v2685, v2686); real2 v2703 = ctimesminusplus(reverse(v2689), tbl[476 + tbloffset], ctimes(v2689, tbl[477 + tbloffset])); real2 v2697 = ctimesminusplus(reverse(v2687), tbl[474 + tbloffset], ctimes(v2687, tbl[475 + tbloffset])); real2 v1215 = minusplus(uminus(v1211), v1212); real2 v1213 = minusplus(v1211, v1212); real2 v1223 = ctimesminusplus(reverse(v1213), tbl[218 + tbloffset], ctimes(v1213, tbl[219 + tbloffset])); real2 v1229 = ctimesminusplus(reverse(v1215), tbl[220 + tbloffset], ctimes(v1215, tbl[221 + tbloffset])); real2 v423 = ctimesminusplus(reverse(v413), tbl[58 + tbloffset], ctimes(v413, tbl[59 + tbloffset])); real2 v2165 = plus(v423, v743); real2 v2159 = reverse(minus(v423, v743)); real2 v2245 = plus(v2164, v2165); real2 v2239 = reverse(minus(v2164, v2165)); real2 v44 = load(in, 42 << inShift); real2 v108 = load(in, 106 << inShift); real2 v331 = reverse(minus(v44, v108)); real2 v337 = plus(v44, v108); real2 v76 = load(in, 74 << inShift); real2 v12 = load(in, 10 << inShift); real2 v336 = plus(v12, v76); real2 v332 = minus(v76, v12); real2 v976 = plus(v336, v337); real2 v972 = minus(v337, v336); real2 v335 = minusplus(uminus(v331), v332); real2 v333 = minusplus(v331, v332); real2 v343 = ctimesminusplus(reverse(v333), tbl[42 + tbloffset], ctimes(v333, tbl[43 + tbloffset])); real2 v349 = ctimesminusplus(reverse(v335), tbl[44 + tbloffset], ctimes(v335, tbl[45 + tbloffset])); real2 v124 = load(in, 122 << inShift); real2 v60 = load(in, 58 << inShift); real2 v651 = reverse(minus(v60, v124)); real2 v657 = plus(v60, v124); real2 v28 = load(in, 26 << inShift); real2 v92 = load(in, 90 << inShift); real2 v652 = minus(v92, v28); real2 v656 = plus(v28, v92); real2 v977 = plus(v656, v657); real2 v971 = reverse(minus(v656, v657)); real2 v973 = minusplus(v971, v972); real2 v975 = minusplus(uminus(v971), v972); real2 v983 = ctimesminusplus(reverse(v973), tbl[170 + tbloffset], ctimes(v973, tbl[171 + tbloffset])); real2 v1131 = reverse(minus(v976, v977)); real2 v1137 = plus(v976, v977); real2 v655 = minusplus(uminus(v651), v652); real2 v653 = minusplus(v651, v652); real2 v669 = ctimesminusplus(reverse(v655), tbl[108 + tbloffset], ctimes(v655, tbl[109 + tbloffset])); real2 v663 = ctimesminusplus(reverse(v653), tbl[106 + tbloffset], ctimes(v653, tbl[107 + tbloffset])); real2 v2079 = reverse(minus(v343, v663)); real2 v2085 = plus(v343, v663); real2 v2605 = reverse(minus(v349, v669)); real2 v2611 = plus(v349, v669); real2 v989 = ctimesminusplus(reverse(v975), tbl[172 + tbloffset], ctimes(v975, tbl[173 + tbloffset])); real2 v20 = load(in, 18 << inShift); real2 v84 = load(in, 82 << inShift); real2 v496 = plus(v20, v84); real2 v492 = minus(v84, v20); real2 v52 = load(in, 50 << inShift); real2 v116 = load(in, 114 << inShift); real2 v491 = reverse(minus(v52, v116)); real2 v497 = plus(v52, v116); real2 v817 = plus(v496, v497); real2 v811 = reverse(minus(v496, v497)); real2 v493 = minusplus(v491, v492); real2 v495 = minusplus(uminus(v491), v492); real2 v509 = ctimesminusplus(reverse(v495), tbl[76 + tbloffset], ctimes(v495, tbl[77 + tbloffset])); real2 v503 = ctimesminusplus(reverse(v493), tbl[74 + tbloffset], ctimes(v493, tbl[75 + tbloffset])); real2 v36 = load(in, 34 << inShift); real2 v100 = load(in, 98 << inShift); real2 v171 = reverse(minus(v36, v100)); real2 v177 = plus(v36, v100); real2 v68 = load(in, 66 << inShift); real2 v4 = load(in, 2 << inShift); real2 v176 = plus(v4, v68); real2 v172 = minus(v68, v4); real2 v816 = plus(v176, v177); real2 v812 = minus(v177, v176); real2 v1136 = plus(v816, v817); real2 v1132 = minus(v817, v816); real2 v1133 = minusplus(v1131, v1132); real2 v1135 = minusplus(uminus(v1131), v1132); real2 v1149 = ctimesminusplus(reverse(v1135), tbl[204 + tbloffset], ctimes(v1135, tbl[205 + tbloffset])); real2 v1296 = plus(v1136, v1137); real2 v1292 = minus(v1137, v1136); real2 v1295 = minusplus(uminus(v1291), v1292); real2 v1293 = minusplus(v1291, v1292); real2 v1303 = ctimesminusplus(reverse(v1293), tbl[234 + tbloffset], ctimes(v1293, tbl[235 + tbloffset])); real2 v1331 = reverse(minus(v1296, v1297)); real2 v1337 = plus(v1296, v1297); real2 v173 = minusplus(v171, v172); real2 v175 = minusplus(uminus(v171), v172); real2 v189 = ctimesminusplus(reverse(v175), tbl[12 + tbloffset], ctimes(v175, tbl[13 + tbloffset])); real2 v1309 = ctimesminusplus(reverse(v1295), tbl[236 + tbloffset], ctimes(v1295, tbl[237 + tbloffset])); real2 v815 = minusplus(uminus(v811), v812); real2 v813 = minusplus(v811, v812); real2 v1143 = ctimesminusplus(reverse(v1133), tbl[202 + tbloffset], ctimes(v1133, tbl[203 + tbloffset])); real2 v1541 = reverse(minus(v1149, v1229)); real2 v1547 = plus(v1149, v1229); real2 v2610 = plus(v189, v509); real2 v2606 = minus(v509, v189); real2 v2770 = plus(v2610, v2611); real2 v2766 = minus(v2611, v2610); real2 v823 = ctimesminusplus(reverse(v813), tbl[138 + tbloffset], ctimes(v813, tbl[139 + tbloffset])); real2 v829 = ctimesminusplus(reverse(v815), tbl[140 + tbloffset], ctimes(v815, tbl[141 + tbloffset])); real2 v2811 = plus(v2770, v2771); real2 v2805 = reverse(minus(v2770, v2771)); real2 v2767 = minusplus(v2765, v2766); real2 v2769 = minusplus(uminus(v2765), v2766); real2 v2607 = minusplus(v2605, v2606); real2 v2609 = minusplus(uminus(v2605), v2606); real2 v2617 = ctimesminusplus(reverse(v2607), tbl[458 + tbloffset], ctimes(v2607, tbl[459 + tbloffset])); real2 v2623 = ctimesminusplus(reverse(v2609), tbl[460 + tbloffset], ctimes(v2609, tbl[461 + tbloffset])); real2 v3013 = reverse(minus(v2623, v2703)); real2 v3019 = plus(v2623, v2703); real2 v2783 = ctimesminusplus(reverse(v2769), tbl[492 + tbloffset], ctimes(v2769, tbl[493 + tbloffset])); real2 v2941 = plus(v2617, v2697); real2 v2935 = reverse(minus(v2617, v2697)); real2 v2777 = ctimesminusplus(reverse(v2767), tbl[490 + tbloffset], ctimes(v2767, tbl[491 + tbloffset])); real2 v1660 = minus(v983, v823); real2 v1664 = plus(v823, v983); real2 v1874 = plus(v829, v989); real2 v1870 = minus(v989, v829); real2 v1909 = reverse(minus(v1874, v1875)); real2 v1915 = plus(v1874, v1875); real2 v1663 = minusplus(uminus(v1659), v1660); real2 v1661 = minusplus(v1659, v1660); real2 v1677 = ctimesminusplus(reverse(v1663), tbl[296 + tbloffset], ctimes(v1663, tbl[297 + tbloffset])); real2 v1873 = minusplus(uminus(v1869), v1870); real2 v1871 = minusplus(v1869, v1870); real2 v1887 = ctimesminusplus(reverse(v1873), tbl[332 + tbloffset], ctimes(v1873, tbl[333 + tbloffset])); real2 v1705 = plus(v1664, v1665); real2 v1699 = reverse(minus(v1664, v1665)); real2 v1671 = ctimesminusplus(reverse(v1661), tbl[294 + tbloffset], ctimes(v1661, tbl[295 + tbloffset])); real2 v1881 = ctimesminusplus(reverse(v1871), tbl[330 + tbloffset], ctimes(v1871, tbl[331 + tbloffset])); real2 v1469 = plus(v1143, v1223); real2 v1463 = reverse(minus(v1143, v1223)); real2 v54 = load(in, 52 << inShift); real2 v118 = load(in, 116 << inShift); real2 v537 = plus(v54, v118); real2 v531 = reverse(minus(v54, v118)); real2 v86 = load(in, 84 << inShift); real2 v22 = load(in, 20 << inShift); real2 v536 = plus(v22, v86); real2 v532 = minus(v86, v22); real2 v851 = reverse(minus(v536, v537)); real2 v857 = plus(v536, v537); real2 v533 = minusplus(v531, v532); real2 v535 = minusplus(uminus(v531), v532); real2 v549 = ctimesminusplus(reverse(v535), tbl[84 + tbloffset], ctimes(v535, tbl[85 + tbloffset])); real2 v102 = load(in, 100 << inShift); real2 v38 = load(in, 36 << inShift); real2 v217 = plus(v38, v102); real2 v211 = reverse(minus(v38, v102)); real2 v70 = load(in, 68 << inShift); real2 v6 = load(in, 4 << inShift); real2 v216 = plus(v6, v70); real2 v212 = minus(v70, v6); real2 v213 = minusplus(v211, v212); real2 v215 = minusplus(uminus(v211), v212); real2 v229 = ctimesminusplus(reverse(v215), tbl[20 + tbloffset], ctimes(v215, tbl[21 + tbloffset])); real2 v2646 = minus(v549, v229); real2 v2650 = plus(v229, v549); real2 v856 = plus(v216, v217); real2 v852 = minus(v217, v216); real2 v853 = minusplus(v851, v852); real2 v855 = minusplus(uminus(v851), v852); real2 v863 = ctimesminusplus(reverse(v853), tbl[146 + tbloffset], ctimes(v853, tbl[147 + tbloffset])); real2 v869 = ctimesminusplus(reverse(v855), tbl[148 + tbloffset], ctimes(v855, tbl[149 + tbloffset])); real2 v1176 = plus(v856, v857); real2 v1172 = minus(v857, v856); real2 v110 = load(in, 108 << inShift); real2 v46 = load(in, 44 << inShift); real2 v377 = plus(v46, v110); real2 v371 = reverse(minus(v46, v110)); real2 v78 = load(in, 76 << inShift); real2 v14 = load(in, 12 << inShift); real2 v372 = minus(v78, v14); real2 v376 = plus(v14, v78); real2 v1012 = minus(v377, v376); real2 v1016 = plus(v376, v377); real2 v373 = minusplus(v371, v372); real2 v375 = minusplus(uminus(v371), v372); real2 v389 = ctimesminusplus(reverse(v375), tbl[52 + tbloffset], ctimes(v375, tbl[53 + tbloffset])); real2 v30 = load(in, 28 << inShift); real2 v94 = load(in, 92 << inShift); real2 v696 = plus(v30, v94); real2 v692 = minus(v94, v30); real2 v62 = load(in, 60 << inShift); real2 v126 = load(in, 124 << inShift); real2 v697 = plus(v62, v126); real2 v691 = reverse(minus(v62, v126)); real2 v1017 = plus(v696, v697); real2 v1011 = reverse(minus(v696, v697)); real2 v1171 = reverse(minus(v1016, v1017)); real2 v1177 = plus(v1016, v1017); real2 v1013 = minusplus(v1011, v1012); real2 v1015 = minusplus(uminus(v1011), v1012); real2 v1175 = minusplus(uminus(v1171), v1172); real2 v1173 = minusplus(v1171, v1172); real2 v1183 = ctimesminusplus(reverse(v1173), tbl[210 + tbloffset], ctimes(v1173, tbl[211 + tbloffset])); real2 v1189 = ctimesminusplus(reverse(v1175), tbl[212 + tbloffset], ctimes(v1175, tbl[213 + tbloffset])); real2 v1029 = ctimesminusplus(reverse(v1015), tbl[180 + tbloffset], ctimes(v1015, tbl[181 + tbloffset])); real2 v1023 = ctimesminusplus(reverse(v1013), tbl[178 + tbloffset], ctimes(v1013, tbl[179 + tbloffset])); real2 v1625 = plus(v863, v1023); real2 v1619 = reverse(minus(v863, v1023)); real2 v1835 = plus(v869, v1029); real2 v1829 = reverse(minus(v869, v1029)); real2 v693 = minusplus(v691, v692); real2 v695 = minusplus(uminus(v691), v692); real2 v709 = ctimesminusplus(reverse(v695), tbl[116 + tbloffset], ctimes(v695, tbl[117 + tbloffset])); real2 v2645 = reverse(minus(v389, v709)); real2 v2651 = plus(v389, v709); real2 v1257 = plus(v1176, v1177); real2 v1251 = reverse(minus(v1176, v1177)); real2 v2731 = plus(v2650, v2651); real2 v2725 = reverse(minus(v2650, v2651)); real2 v114 = load(in, 112 << inShift); real2 v50 = load(in, 48 << inShift); real2 v457 = plus(v50, v114); real2 v451 = reverse(minus(v50, v114)); real2 v18 = load(in, 16 << inShift); real2 v82 = load(in, 80 << inShift); real2 v456 = plus(v18, v82); real2 v452 = minus(v82, v18); real2 v771 = reverse(minus(v456, v457)); real2 v777 = plus(v456, v457); real2 v453 = minusplus(v451, v452); real2 v455 = minusplus(uminus(v451), v452); real2 v469 = ctimesminusplus(reverse(v455), tbl[68 + tbloffset], ctimes(v455, tbl[69 + tbloffset])); real2 v66 = load(in, 64 << inShift); real2 v2 = load(in, 0 << inShift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << inShift); real2 v34 = load(in, 32 << inShift); real2 v131 = reverse(minus(v34, v98)); real2 v137 = plus(v34, v98); real2 v133 = minusplus(v131, v132); real2 v135 = minusplus(uminus(v131), v132); real2 v149 = ctimesminusplus(reverse(v135), tbl[4 + tbloffset], ctimes(v135, tbl[5 + tbloffset])); real2 v2566 = minus(v469, v149); real2 v2570 = plus(v149, v469); real2 v772 = minus(v137, v136); real2 v776 = plus(v136, v137); real2 v1092 = minus(v777, v776); real2 v1096 = plus(v776, v777); real2 v773 = minusplus(v771, v772); real2 v775 = minusplus(uminus(v771), v772); real2 v783 = ctimesminusplus(reverse(v773), tbl[130 + tbloffset], ctimes(v773, tbl[131 + tbloffset])); real2 v789 = ctimesminusplus(reverse(v775), tbl[132 + tbloffset], ctimes(v775, tbl[133 + tbloffset])); real2 v74 = load(in, 72 << inShift); real2 v10 = load(in, 8 << inShift); real2 v296 = plus(v10, v74); real2 v292 = minus(v74, v10); real2 v42 = load(in, 40 << inShift); real2 v106 = load(in, 104 << inShift); real2 v291 = reverse(minus(v42, v106)); real2 v297 = plus(v42, v106); real2 v293 = minusplus(v291, v292); real2 v295 = minusplus(uminus(v291), v292); real2 v309 = ctimesminusplus(reverse(v295), tbl[36 + tbloffset], ctimes(v295, tbl[37 + tbloffset])); real2 v932 = minus(v297, v296); real2 v936 = plus(v296, v297); real2 v122 = load(in, 120 << inShift); real2 v58 = load(in, 56 << inShift); real2 v617 = plus(v58, v122); real2 v611 = reverse(minus(v58, v122)); real2 v26 = load(in, 24 << inShift); real2 v90 = load(in, 88 << inShift); real2 v612 = minus(v90, v26); real2 v616 = plus(v26, v90); real2 v937 = plus(v616, v617); real2 v931 = reverse(minus(v616, v617)); real2 v1091 = reverse(minus(v936, v937)); real2 v1097 = plus(v936, v937); real2 v933 = minusplus(v931, v932); real2 v935 = minusplus(uminus(v931), v932); real2 v1093 = minusplus(v1091, v1092); real2 v1095 = minusplus(uminus(v1091), v1092); real2 v1103 = ctimesminusplus(reverse(v1093), tbl[194 + tbloffset], ctimes(v1093, tbl[195 + tbloffset])); real2 v1468 = plus(v1103, v1183); real2 v1464 = minus(v1183, v1103); real2 v1508 = plus(v1468, v1469); real2 v1504 = minus(v1469, v1468); real2 v1252 = minus(v1097, v1096); real2 v1256 = plus(v1096, v1097); real2 v1336 = plus(v1256, v1257); real2 v1332 = minus(v1257, v1256); real2 v1335 = minusplus(uminus(v1331), v1332); real2 v1333 = minusplus(v1331, v1332); real2 v1343 = ctimesminusplus(reverse(v1333), tbl[242 + tbloffset], ctimes(v1333, tbl[243 + tbloffset])); real2 v1349 = ctimesminusplus(reverse(v1335), tbl[244 + tbloffset], ctimes(v1335, tbl[245 + tbloffset])); real2 v1376 = plus(v1336, v1337); real2 v1372 = minus(v1337, v1336); real2 v1465 = minusplus(v1463, v1464); real2 v1467 = minusplus(uminus(v1463), v1464); real2 v1255 = minusplus(uminus(v1251), v1252); real2 v1253 = minusplus(v1251, v1252); real2 v1481 = ctimesminusplus(reverse(v1467), tbl[264 + tbloffset], ctimes(v1467, tbl[265 + tbloffset])); real2 v1475 = ctimesminusplus(reverse(v1465), tbl[262 + tbloffset], ctimes(v1465, tbl[263 + tbloffset])); real2 v1109 = ctimesminusplus(reverse(v1095), tbl[196 + tbloffset], ctimes(v1095, tbl[197 + tbloffset])); real2 v1542 = minus(v1189, v1109); real2 v1546 = plus(v1109, v1189); real2 v1545 = minusplus(uminus(v1541), v1542); real2 v1543 = minusplus(v1541, v1542); real2 v1553 = ctimesminusplus(reverse(v1543), tbl[274 + tbloffset], ctimes(v1543, tbl[275 + tbloffset])); real2 v1559 = ctimesminusplus(reverse(v1545), tbl[276 + tbloffset], ctimes(v1545, tbl[277 + tbloffset])); real2 v1582 = minus(v1547, v1546); real2 v1586 = plus(v1546, v1547); real2 v1269 = ctimesminusplus(reverse(v1255), tbl[228 + tbloffset], ctimes(v1255, tbl[229 + tbloffset])); real2 v1438 = minus(v1309, v1269); real2 v1442 = plus(v1269, v1309); real2 v1263 = ctimesminusplus(reverse(v1253), tbl[226 + tbloffset], ctimes(v1253, tbl[227 + tbloffset])); real2 v943 = ctimesminusplus(reverse(v933), tbl[162 + tbloffset], ctimes(v933, tbl[163 + tbloffset])); real2 v1624 = plus(v783, v943); real2 v1620 = minus(v943, v783); real2 v1623 = minusplus(uminus(v1619), v1620); real2 v1621 = minusplus(v1619, v1620); real2 v1700 = minus(v1625, v1624); real2 v1704 = plus(v1624, v1625); real2 v1631 = ctimesminusplus(reverse(v1621), tbl[286 + tbloffset], ctimes(v1621, tbl[287 + tbloffset])); real2 v949 = ctimesminusplus(reverse(v935), tbl[164 + tbloffset], ctimes(v935, tbl[165 + tbloffset])); real2 v1830 = minus(v949, v789); real2 v1834 = plus(v789, v949); real2 v1782 = plus(v1631, v1671); real2 v1778 = minus(v1671, v1631); real2 v1910 = minus(v1835, v1834); real2 v1914 = plus(v1834, v1835); real2 v1950 = minus(v1915, v1914); real2 v1954 = plus(v1914, v1915); real2 v1913 = minusplus(uminus(v1909), v1910); real2 v1911 = minusplus(v1909, v1910); real2 v613 = minusplus(v611, v612); real2 v615 = minusplus(uminus(v611), v612); real2 v629 = ctimesminusplus(reverse(v615), tbl[100 + tbloffset], ctimes(v615, tbl[101 + tbloffset])); real2 v1744 = plus(v1704, v1705); real2 v1740 = minus(v1705, v1704); real2 v1637 = ctimesminusplus(reverse(v1623), tbl[288 + tbloffset], ctimes(v1623, tbl[289 + tbloffset])); real2 v1927 = ctimesminusplus(reverse(v1913), tbl[340 + tbloffset], ctimes(v1913, tbl[341 + tbloffset])); real2 v2571 = plus(v309, v629); real2 v2565 = reverse(minus(v309, v629)); real2 v1833 = minusplus(uminus(v1829), v1830); real2 v1831 = minusplus(v1829, v1830); real2 v1921 = ctimesminusplus(reverse(v1911), tbl[338 + tbloffset], ctimes(v1911, tbl[339 + tbloffset])); real2 v1804 = minus(v1677, v1637); real2 v1808 = plus(v1637, v1677); real2 v1847 = ctimesminusplus(reverse(v1833), tbl[324 + tbloffset], ctimes(v1833, tbl[325 + tbloffset])); real2 v2014 = minus(v1887, v1847); real2 v2018 = plus(v1847, v1887); real2 v1841 = ctimesminusplus(reverse(v1831), tbl[322 + tbloffset], ctimes(v1831, tbl[323 + tbloffset])); real2 v1988 = minus(v1881, v1841); real2 v1992 = plus(v1841, v1881); real2 v1703 = minusplus(uminus(v1699), v1700); real2 v1701 = minusplus(v1699, v1700); real2 v1717 = ctimesminusplus(reverse(v1703), tbl[304 + tbloffset], ctimes(v1703, tbl[305 + tbloffset])); real2 v1711 = ctimesminusplus(reverse(v1701), tbl[302 + tbloffset], ctimes(v1701, tbl[303 + tbloffset])); real2 v2730 = plus(v2570, v2571); real2 v2726 = minus(v2571, v2570); real2 v1412 = minus(v1303, v1263); real2 v1416 = plus(v1263, v1303); real2 v63 = load(in, 61 << inShift); real2 v127 = load(in, 125 << inShift); real2 v717 = plus(v63, v127); real2 v711 = reverse(minus(v63, v127)); real2 v95 = load(in, 93 << inShift); real2 v31 = load(in, 29 << inShift); real2 v712 = minus(v95, v31); real2 v716 = plus(v31, v95); real2 v1037 = plus(v716, v717); real2 v1031 = reverse(minus(v716, v717)); real2 v79 = load(in, 77 << inShift); real2 v15 = load(in, 13 << inShift); real2 v396 = plus(v15, v79); real2 v392 = minus(v79, v15); real2 v111 = load(in, 109 << inShift); real2 v47 = load(in, 45 << inShift); real2 v397 = plus(v47, v111); real2 v391 = reverse(minus(v47, v111)); real2 v1032 = minus(v397, v396); real2 v1036 = plus(v396, v397); real2 v1033 = minusplus(v1031, v1032); real2 v1035 = minusplus(uminus(v1031), v1032); real2 v1049 = ctimesminusplus(reverse(v1035), tbl[184 + tbloffset], ctimes(v1035, tbl[185 + tbloffset])); real2 v1043 = ctimesminusplus(reverse(v1033), tbl[182 + tbloffset], ctimes(v1033, tbl[183 + tbloffset])); real2 v1197 = plus(v1036, v1037); real2 v1191 = reverse(minus(v1036, v1037)); real2 v23 = load(in, 21 << inShift); real2 v87 = load(in, 85 << inShift); real2 v556 = plus(v23, v87); real2 v552 = minus(v87, v23); real2 v119 = load(in, 117 << inShift); real2 v55 = load(in, 53 << inShift); real2 v557 = plus(v55, v119); real2 v551 = reverse(minus(v55, v119)); real2 v877 = plus(v556, v557); real2 v871 = reverse(minus(v556, v557)); real2 v7 = load(in, 5 << inShift); real2 v71 = load(in, 69 << inShift); real2 v232 = minus(v71, v7); real2 v236 = plus(v7, v71); real2 v103 = load(in, 101 << inShift); real2 v39 = load(in, 37 << inShift); real2 v237 = plus(v39, v103); real2 v231 = reverse(minus(v39, v103)); real2 v876 = plus(v236, v237); real2 v872 = minus(v237, v236); real2 v1192 = minus(v877, v876); real2 v1196 = plus(v876, v877); real2 v1271 = reverse(minus(v1196, v1197)); real2 v1277 = plus(v1196, v1197); real2 v875 = minusplus(uminus(v871), v872); real2 v873 = minusplus(v871, v872); real2 v883 = ctimesminusplus(reverse(v873), tbl[150 + tbloffset], ctimes(v873, tbl[151 + tbloffset])); real2 v1639 = reverse(minus(v883, v1043)); real2 v1645 = plus(v883, v1043); real2 v1195 = minusplus(uminus(v1191), v1192); real2 v1193 = minusplus(v1191, v1192); real2 v1209 = ctimesminusplus(reverse(v1195), tbl[216 + tbloffset], ctimes(v1195, tbl[217 + tbloffset])); real2 v1203 = ctimesminusplus(reverse(v1193), tbl[214 + tbloffset], ctimes(v1193, tbl[215 + tbloffset])); real2 v83 = load(in, 81 << inShift); real2 v19 = load(in, 17 << inShift); real2 v476 = plus(v19, v83); real2 v472 = minus(v83, v19); real2 v51 = load(in, 49 << inShift); real2 v115 = load(in, 113 << inShift); real2 v477 = plus(v51, v115); real2 v471 = reverse(minus(v51, v115)); real2 v797 = plus(v476, v477); real2 v791 = reverse(minus(v476, v477)); real2 v3 = load(in, 1 << inShift); real2 v67 = load(in, 65 << inShift); real2 v156 = plus(v3, v67); real2 v152 = minus(v67, v3); real2 v35 = load(in, 33 << inShift); real2 v99 = load(in, 97 << inShift); real2 v157 = plus(v35, v99); real2 v151 = reverse(minus(v35, v99)); real2 v792 = minus(v157, v156); real2 v796 = plus(v156, v157); real2 v793 = minusplus(v791, v792); real2 v795 = minusplus(uminus(v791), v792); real2 v803 = ctimesminusplus(reverse(v793), tbl[134 + tbloffset], ctimes(v793, tbl[135 + tbloffset])); real2 v1112 = minus(v797, v796); real2 v1116 = plus(v796, v797); real2 v107 = load(in, 105 << inShift); real2 v43 = load(in, 41 << inShift); real2 v317 = plus(v43, v107); real2 v311 = reverse(minus(v43, v107)); real2 v75 = load(in, 73 << inShift); real2 v11 = load(in, 9 << inShift); real2 v316 = plus(v11, v75); real2 v312 = minus(v75, v11); real2 v956 = plus(v316, v317); real2 v952 = minus(v317, v316); real2 v59 = load(in, 57 << inShift); real2 v123 = load(in, 121 << inShift); real2 v631 = reverse(minus(v59, v123)); real2 v637 = plus(v59, v123); real2 v27 = load(in, 25 << inShift); real2 v91 = load(in, 89 << inShift); real2 v636 = plus(v27, v91); real2 v632 = minus(v91, v27); real2 v957 = plus(v636, v637); real2 v951 = reverse(minus(v636, v637)); real2 v1111 = reverse(minus(v956, v957)); real2 v1117 = plus(v956, v957); real2 v1276 = plus(v1116, v1117); real2 v1272 = minus(v1117, v1116); real2 v1275 = minusplus(uminus(v1271), v1272); real2 v1273 = minusplus(v1271, v1272); real2 v1283 = ctimesminusplus(reverse(v1273), tbl[230 + tbloffset], ctimes(v1273, tbl[231 + tbloffset])); real2 v1352 = minus(v1277, v1276); real2 v1356 = plus(v1276, v1277); real2 v1289 = ctimesminusplus(reverse(v1275), tbl[232 + tbloffset], ctimes(v1275, tbl[233 + tbloffset])); real2 v1115 = minusplus(uminus(v1111), v1112); real2 v1113 = minusplus(v1111, v1112); real2 v1123 = ctimesminusplus(reverse(v1113), tbl[198 + tbloffset], ctimes(v1113, tbl[199 + tbloffset])); real2 v1129 = ctimesminusplus(reverse(v1115), tbl[200 + tbloffset], ctimes(v1115, tbl[201 + tbloffset])); real2 v1488 = plus(v1123, v1203); real2 v1484 = minus(v1203, v1123); real2 v1566 = plus(v1129, v1209); real2 v1562 = minus(v1209, v1129); real2 v85 = load(in, 83 << inShift); real2 v21 = load(in, 19 << inShift); real2 v512 = minus(v85, v21); real2 v516 = plus(v21, v85); real2 v117 = load(in, 115 << inShift); real2 v53 = load(in, 51 << inShift); real2 v517 = plus(v53, v117); real2 v511 = reverse(minus(v53, v117)); real2 v831 = reverse(minus(v516, v517)); real2 v837 = plus(v516, v517); real2 v69 = load(in, 67 << inShift); real2 v5 = load(in, 3 << inShift); real2 v192 = minus(v69, v5); real2 v196 = plus(v5, v69); real2 v37 = load(in, 35 << inShift); real2 v101 = load(in, 99 << inShift); real2 v197 = plus(v37, v101); real2 v191 = reverse(minus(v37, v101)); real2 v832 = minus(v197, v196); real2 v836 = plus(v196, v197); real2 v1152 = minus(v837, v836); real2 v1156 = plus(v836, v837); real2 v61 = load(in, 59 << inShift); real2 v125 = load(in, 123 << inShift); real2 v677 = plus(v61, v125); real2 v671 = reverse(minus(v61, v125)); real2 v29 = load(in, 27 << inShift); real2 v93 = load(in, 91 << inShift); real2 v672 = minus(v93, v29); real2 v676 = plus(v29, v93); real2 v997 = plus(v676, v677); real2 v991 = reverse(minus(v676, v677)); real2 v109 = load(in, 107 << inShift); real2 v45 = load(in, 43 << inShift); real2 v357 = plus(v45, v109); real2 v351 = reverse(minus(v45, v109)); real2 v77 = load(in, 75 << inShift); real2 v13 = load(in, 11 << inShift); real2 v352 = minus(v77, v13); real2 v356 = plus(v13, v77); real2 v992 = minus(v357, v356); real2 v996 = plus(v356, v357); real2 v1157 = plus(v996, v997); real2 v1151 = reverse(minus(v996, v997)); real2 v1155 = minusplus(uminus(v1151), v1152); real2 v1153 = minusplus(v1151, v1152); real2 v1163 = ctimesminusplus(reverse(v1153), tbl[206 + tbloffset], ctimes(v1153, tbl[207 + tbloffset])); real2 v1316 = plus(v1156, v1157); real2 v1312 = minus(v1157, v1156); real2 v41 = load(in, 39 << inShift); real2 v105 = load(in, 103 << inShift); real2 v277 = plus(v41, v105); real2 v271 = reverse(minus(v41, v105)); real2 v9 = load(in, 7 << inShift); real2 v73 = load(in, 71 << inShift); real2 v276 = plus(v9, v73); real2 v272 = minus(v73, v9); real2 v916 = plus(v276, v277); real2 v912 = minus(v277, v276); real2 v89 = load(in, 87 << inShift); real2 v25 = load(in, 23 << inShift); real2 v592 = minus(v89, v25); real2 v596 = plus(v25, v89); real2 v57 = load(in, 55 << inShift); real2 v121 = load(in, 119 << inShift); real2 v591 = reverse(minus(v57, v121)); real2 v597 = plus(v57, v121); real2 v911 = reverse(minus(v596, v597)); real2 v917 = plus(v596, v597); real2 v1236 = plus(v916, v917); real2 v1232 = minus(v917, v916); real2 v81 = load(in, 79 << inShift); real2 v17 = load(in, 15 << inShift); real2 v432 = minus(v81, v17); real2 v436 = plus(v17, v81); real2 v113 = load(in, 111 << inShift); real2 v49 = load(in, 47 << inShift); real2 v437 = plus(v49, v113); real2 v431 = reverse(minus(v49, v113)); real2 v1072 = minus(v437, v436); real2 v1076 = plus(v436, v437); real2 v65 = load(in, 63 << inShift); real2 v129 = load(in, 127 << inShift); real2 v757 = plus(v65, v129); real2 v751 = reverse(minus(v65, v129)); real2 v97 = load(in, 95 << inShift); real2 v33 = load(in, 31 << inShift); real2 v752 = minus(v97, v33); real2 v756 = plus(v33, v97); real2 v1077 = plus(v756, v757); real2 v1071 = reverse(minus(v756, v757)); real2 v1231 = reverse(minus(v1076, v1077)); real2 v1237 = plus(v1076, v1077); real2 v1317 = plus(v1236, v1237); real2 v1311 = reverse(minus(v1236, v1237)); real2 v1351 = reverse(minus(v1316, v1317)); real2 v1357 = plus(v1316, v1317); real2 v1371 = reverse(minus(v1356, v1357)); real2 v1377 = plus(v1356, v1357); store(out, 0 << outShift, plus(v1376, v1377)); real2 v1390 = minus(v1376, v1377); store(out, 64 << outShift, ctimesminusplus(v1390, tbl[0 + tbloffset], ctimes(reverse(v1390), tbl[1 + tbloffset]))); real2 v1353 = minusplus(v1351, v1352); real2 v1355 = minusplus(uminus(v1351), v1352); real2 v1369 = ctimesminusplus(reverse(v1355), tbl[248 + tbloffset], ctimes(v1355, tbl[249 + tbloffset])); store(out, 48 << outShift, plus(v1349, v1369)); real2 v1404 = minus(v1349, v1369); store(out, 112 << outShift, ctimesminusplus(v1404, tbl[0 + tbloffset], ctimes(reverse(v1404), tbl[1 + tbloffset]))); real2 v1363 = ctimesminusplus(reverse(v1353), tbl[246 + tbloffset], ctimes(v1353, tbl[247 + tbloffset])); store(out, 16 << outShift, plus(v1343, v1363)); real2 v1398 = minus(v1343, v1363); store(out, 80 << outShift, ctimesminusplus(v1398, tbl[0 + tbloffset], ctimes(reverse(v1398), tbl[1 + tbloffset]))); real2 v1373 = minusplus(v1371, v1372); real2 v1375 = minusplus(uminus(v1371), v1372); store(out, 96 << outShift, ctimesminusplus(reverse(v1375), tbl[252 + tbloffset], ctimes(v1375, tbl[253 + tbloffset]))); store(out, 32 << outShift, ctimesminusplus(reverse(v1373), tbl[250 + tbloffset], ctimes(v1373, tbl[251 + tbloffset]))); real2 v1313 = minusplus(v1311, v1312); real2 v1315 = minusplus(uminus(v1311), v1312); real2 v1323 = ctimesminusplus(reverse(v1313), tbl[238 + tbloffset], ctimes(v1313, tbl[239 + tbloffset])); real2 v1417 = plus(v1283, v1323); real2 v1411 = reverse(minus(v1283, v1323)); store(out, 8 << outShift, plus(v1416, v1417)); real2 v1430 = minus(v1416, v1417); store(out, 72 << outShift, ctimesminusplus(v1430, tbl[0 + tbloffset], ctimes(reverse(v1430), tbl[1 + tbloffset]))); real2 v1413 = minusplus(v1411, v1412); real2 v1415 = minusplus(uminus(v1411), v1412); store(out, 104 << outShift, ctimesminusplus(reverse(v1415), tbl[256 + tbloffset], ctimes(v1415, tbl[257 + tbloffset]))); store(out, 40 << outShift, ctimesminusplus(reverse(v1413), tbl[254 + tbloffset], ctimes(v1413, tbl[255 + tbloffset]))); real2 v1329 = ctimesminusplus(reverse(v1315), tbl[240 + tbloffset], ctimes(v1315, tbl[241 + tbloffset])); real2 v1443 = plus(v1289, v1329); real2 v1437 = reverse(minus(v1289, v1329)); store(out, 24 << outShift, plus(v1442, v1443)); real2 v1456 = minus(v1442, v1443); store(out, 88 << outShift, ctimesminusplus(v1456, tbl[0 + tbloffset], ctimes(reverse(v1456), tbl[1 + tbloffset]))); real2 v1441 = minusplus(uminus(v1437), v1438); real2 v1439 = minusplus(v1437, v1438); store(out, 120 << outShift, ctimesminusplus(reverse(v1441), tbl[260 + tbloffset], ctimes(v1441, tbl[261 + tbloffset]))); store(out, 56 << outShift, ctimesminusplus(reverse(v1439), tbl[258 + tbloffset], ctimes(v1439, tbl[259 + tbloffset]))); real2 v1235 = minusplus(uminus(v1231), v1232); real2 v1233 = minusplus(v1231, v1232); real2 v1243 = ctimesminusplus(reverse(v1233), tbl[222 + tbloffset], ctimes(v1233, tbl[223 + tbloffset])); real2 v1489 = plus(v1163, v1243); real2 v1483 = reverse(minus(v1163, v1243)); real2 v1509 = plus(v1488, v1489); real2 v1503 = reverse(minus(v1488, v1489)); store(out, 4 << outShift, plus(v1508, v1509)); real2 v1522 = minus(v1508, v1509); store(out, 68 << outShift, ctimesminusplus(v1522, tbl[0 + tbloffset], ctimes(reverse(v1522), tbl[1 + tbloffset]))); real2 v1507 = minusplus(uminus(v1503), v1504); real2 v1505 = minusplus(v1503, v1504); store(out, 36 << outShift, ctimesminusplus(reverse(v1505), tbl[270 + tbloffset], ctimes(v1505, tbl[271 + tbloffset]))); store(out, 100 << outShift, ctimesminusplus(reverse(v1507), tbl[272 + tbloffset], ctimes(v1507, tbl[273 + tbloffset]))); real2 v1485 = minusplus(v1483, v1484); real2 v1487 = minusplus(uminus(v1483), v1484); real2 v1501 = ctimesminusplus(reverse(v1487), tbl[268 + tbloffset], ctimes(v1487, tbl[269 + tbloffset])); store(out, 52 << outShift, plus(v1481, v1501)); real2 v1534 = minus(v1481, v1501); store(out, 116 << outShift, ctimesminusplus(v1534, tbl[0 + tbloffset], ctimes(reverse(v1534), tbl[1 + tbloffset]))); real2 v1495 = ctimesminusplus(reverse(v1485), tbl[266 + tbloffset], ctimes(v1485, tbl[267 + tbloffset])); store(out, 20 << outShift, plus(v1475, v1495)); real2 v1528 = minus(v1475, v1495); store(out, 84 << outShift, ctimesminusplus(v1528, tbl[0 + tbloffset], ctimes(reverse(v1528), tbl[1 + tbloffset]))); real2 v1249 = ctimesminusplus(reverse(v1235), tbl[224 + tbloffset], ctimes(v1235, tbl[225 + tbloffset])); real2 v1169 = ctimesminusplus(reverse(v1155), tbl[208 + tbloffset], ctimes(v1155, tbl[209 + tbloffset])); real2 v1567 = plus(v1169, v1249); real2 v1561 = reverse(minus(v1169, v1249)); real2 v1581 = reverse(minus(v1566, v1567)); real2 v1587 = plus(v1566, v1567); store(out, 12 << outShift, plus(v1586, v1587)); real2 v1600 = minus(v1586, v1587); store(out, 76 << outShift, ctimesminusplus(v1600, tbl[0 + tbloffset], ctimes(reverse(v1600), tbl[1 + tbloffset]))); real2 v1583 = minusplus(v1581, v1582); store(out, 44 << outShift, ctimesminusplus(reverse(v1583), tbl[282 + tbloffset], ctimes(v1583, tbl[283 + tbloffset]))); real2 v1585 = minusplus(uminus(v1581), v1582); store(out, 108 << outShift, ctimesminusplus(reverse(v1585), tbl[284 + tbloffset], ctimes(v1585, tbl[285 + tbloffset]))); real2 v1565 = minusplus(uminus(v1561), v1562); real2 v1563 = minusplus(v1561, v1562); real2 v1579 = ctimesminusplus(reverse(v1565), tbl[280 + tbloffset], ctimes(v1565, tbl[281 + tbloffset])); store(out, 60 << outShift, plus(v1559, v1579)); real2 v1612 = minus(v1559, v1579); store(out, 124 << outShift, ctimesminusplus(v1612, tbl[0 + tbloffset], ctimes(reverse(v1612), tbl[1 + tbloffset]))); real2 v1573 = ctimesminusplus(reverse(v1563), tbl[278 + tbloffset], ctimes(v1563, tbl[279 + tbloffset])); store(out, 28 << outShift, plus(v1553, v1573)); real2 v1606 = minus(v1553, v1573); store(out, 92 << outShift, ctimesminusplus(v1606, tbl[0 + tbloffset], ctimes(reverse(v1606), tbl[1 + tbloffset]))); real2 v833 = minusplus(v831, v832); real2 v835 = minusplus(uminus(v831), v832); real2 v955 = minusplus(uminus(v951), v952); real2 v953 = minusplus(v951, v952); real2 v963 = ctimesminusplus(reverse(v953), tbl[166 + tbloffset], ctimes(v953, tbl[167 + tbloffset])); real2 v995 = minusplus(uminus(v991), v992); real2 v993 = minusplus(v991, v992); real2 v1003 = ctimesminusplus(reverse(v993), tbl[174 + tbloffset], ctimes(v993, tbl[175 + tbloffset])); real2 v843 = ctimesminusplus(reverse(v833), tbl[142 + tbloffset], ctimes(v833, tbl[143 + tbloffset])); real2 v1640 = minus(v963, v803); real2 v1644 = plus(v803, v963); real2 v1680 = minus(v1003, v843); real2 v1684 = plus(v843, v1003); real2 v1641 = minusplus(v1639, v1640); real2 v1643 = minusplus(uminus(v1639), v1640); real2 v1657 = ctimesminusplus(reverse(v1643), tbl[292 + tbloffset], ctimes(v1643, tbl[293 + tbloffset])); real2 v913 = minusplus(v911, v912); real2 v915 = minusplus(uminus(v911), v912); real2 v1073 = minusplus(v1071, v1072); real2 v1075 = minusplus(uminus(v1071), v1072); real2 v923 = ctimesminusplus(reverse(v913), tbl[158 + tbloffset], ctimes(v913, tbl[159 + tbloffset])); real2 v1083 = ctimesminusplus(reverse(v1073), tbl[190 + tbloffset], ctimes(v1073, tbl[191 + tbloffset])); real2 v1685 = plus(v923, v1083); real2 v1679 = reverse(minus(v923, v1083)); real2 v1681 = minusplus(v1679, v1680); real2 v1683 = minusplus(uminus(v1679), v1680); real2 v1697 = ctimesminusplus(reverse(v1683), tbl[300 + tbloffset], ctimes(v1683, tbl[301 + tbloffset])); real2 v1809 = plus(v1657, v1697); real2 v1803 = reverse(minus(v1657, v1697)); store(out, 26 << outShift, plus(v1808, v1809)); real2 v1822 = minus(v1808, v1809); store(out, 90 << outShift, ctimesminusplus(v1822, tbl[0 + tbloffset], ctimes(reverse(v1822), tbl[1 + tbloffset]))); real2 v1807 = minusplus(uminus(v1803), v1804); real2 v1805 = minusplus(v1803, v1804); store(out, 58 << outShift, ctimesminusplus(reverse(v1805), tbl[318 + tbloffset], ctimes(v1805, tbl[319 + tbloffset]))); store(out, 122 << outShift, ctimesminusplus(reverse(v1807), tbl[320 + tbloffset], ctimes(v1807, tbl[321 + tbloffset]))); real2 v1651 = ctimesminusplus(reverse(v1641), tbl[290 + tbloffset], ctimes(v1641, tbl[291 + tbloffset])); real2 v1691 = ctimesminusplus(reverse(v1681), tbl[298 + tbloffset], ctimes(v1681, tbl[299 + tbloffset])); real2 v1783 = plus(v1651, v1691); real2 v1777 = reverse(minus(v1651, v1691)); real2 v1779 = minusplus(v1777, v1778); real2 v1781 = minusplus(uminus(v1777), v1778); store(out, 106 << outShift, ctimesminusplus(reverse(v1781), tbl[316 + tbloffset], ctimes(v1781, tbl[317 + tbloffset]))); store(out, 42 << outShift, ctimesminusplus(reverse(v1779), tbl[314 + tbloffset], ctimes(v1779, tbl[315 + tbloffset]))); store(out, 10 << outShift, plus(v1782, v1783)); real2 v1796 = minus(v1782, v1783); store(out, 74 << outShift, ctimesminusplus(v1796, tbl[0 + tbloffset], ctimes(reverse(v1796), tbl[1 + tbloffset]))); real2 v1720 = minus(v1645, v1644); real2 v1724 = plus(v1644, v1645); real2 v1719 = reverse(minus(v1684, v1685)); real2 v1725 = plus(v1684, v1685); real2 v1745 = plus(v1724, v1725); real2 v1739 = reverse(minus(v1724, v1725)); store(out, 2 << outShift, plus(v1744, v1745)); real2 v1758 = minus(v1744, v1745); store(out, 66 << outShift, ctimesminusplus(v1758, tbl[0 + tbloffset], ctimes(reverse(v1758), tbl[1 + tbloffset]))); real2 v1741 = minusplus(v1739, v1740); real2 v1743 = minusplus(uminus(v1739), v1740); store(out, 98 << outShift, ctimesminusplus(reverse(v1743), tbl[312 + tbloffset], ctimes(v1743, tbl[313 + tbloffset]))); store(out, 34 << outShift, ctimesminusplus(reverse(v1741), tbl[310 + tbloffset], ctimes(v1741, tbl[311 + tbloffset]))); real2 v1723 = minusplus(uminus(v1719), v1720); real2 v1721 = minusplus(v1719, v1720); real2 v1737 = ctimesminusplus(reverse(v1723), tbl[308 + tbloffset], ctimes(v1723, tbl[309 + tbloffset])); store(out, 50 << outShift, plus(v1717, v1737)); real2 v1770 = minus(v1717, v1737); store(out, 114 << outShift, ctimesminusplus(v1770, tbl[0 + tbloffset], ctimes(reverse(v1770), tbl[1 + tbloffset]))); real2 v1731 = ctimesminusplus(reverse(v1721), tbl[306 + tbloffset], ctimes(v1721, tbl[307 + tbloffset])); store(out, 18 << outShift, plus(v1711, v1731)); real2 v1764 = minus(v1711, v1731); store(out, 82 << outShift, ctimesminusplus(v1764, tbl[0 + tbloffset], ctimes(reverse(v1764), tbl[1 + tbloffset]))); real2 v809 = ctimesminusplus(reverse(v795), tbl[136 + tbloffset], ctimes(v795, tbl[137 + tbloffset])); real2 v969 = ctimesminusplus(reverse(v955), tbl[168 + tbloffset], ctimes(v955, tbl[169 + tbloffset])); real2 v1850 = minus(v969, v809); real2 v1854 = plus(v809, v969); real2 v849 = ctimesminusplus(reverse(v835), tbl[144 + tbloffset], ctimes(v835, tbl[145 + tbloffset])); real2 v929 = ctimesminusplus(reverse(v915), tbl[160 + tbloffset], ctimes(v915, tbl[161 + tbloffset])); real2 v889 = ctimesminusplus(reverse(v875), tbl[152 + tbloffset], ctimes(v875, tbl[153 + tbloffset])); real2 v1089 = ctimesminusplus(reverse(v1075), tbl[192 + tbloffset], ctimes(v1075, tbl[193 + tbloffset])); real2 v1009 = ctimesminusplus(reverse(v995), tbl[176 + tbloffset], ctimes(v995, tbl[177 + tbloffset])); real2 v1890 = minus(v1009, v849); real2 v1894 = plus(v849, v1009); real2 v1849 = reverse(minus(v889, v1049)); real2 v1855 = plus(v889, v1049); real2 v1930 = minus(v1855, v1854); real2 v1934 = plus(v1854, v1855); real2 v1895 = plus(v929, v1089); real2 v1889 = reverse(minus(v929, v1089)); real2 v1929 = reverse(minus(v1894, v1895)); real2 v1935 = plus(v1894, v1895); real2 v1955 = plus(v1934, v1935); real2 v1949 = reverse(minus(v1934, v1935)); store(out, 6 << outShift, plus(v1954, v1955)); real2 v1968 = minus(v1954, v1955); store(out, 70 << outShift, ctimesminusplus(v1968, tbl[0 + tbloffset], ctimes(reverse(v1968), tbl[1 + tbloffset]))); real2 v1951 = minusplus(v1949, v1950); store(out, 38 << outShift, ctimesminusplus(reverse(v1951), tbl[346 + tbloffset], ctimes(v1951, tbl[347 + tbloffset]))); real2 v1953 = minusplus(uminus(v1949), v1950); store(out, 102 << outShift, ctimesminusplus(reverse(v1953), tbl[348 + tbloffset], ctimes(v1953, tbl[349 + tbloffset]))); real2 v1931 = minusplus(v1929, v1930); real2 v1933 = minusplus(uminus(v1929), v1930); real2 v1947 = ctimesminusplus(reverse(v1933), tbl[344 + tbloffset], ctimes(v1933, tbl[345 + tbloffset])); store(out, 54 << outShift, plus(v1927, v1947)); real2 v1980 = minus(v1927, v1947); store(out, 118 << outShift, ctimesminusplus(v1980, tbl[0 + tbloffset], ctimes(reverse(v1980), tbl[1 + tbloffset]))); real2 v1941 = ctimesminusplus(reverse(v1931), tbl[342 + tbloffset], ctimes(v1931, tbl[343 + tbloffset])); store(out, 22 << outShift, plus(v1921, v1941)); real2 v1974 = minus(v1921, v1941); store(out, 86 << outShift, ctimesminusplus(v1974, tbl[0 + tbloffset], ctimes(reverse(v1974), tbl[1 + tbloffset]))); real2 v1851 = minusplus(v1849, v1850); real2 v1853 = minusplus(uminus(v1849), v1850); real2 v1867 = ctimesminusplus(reverse(v1853), tbl[328 + tbloffset], ctimes(v1853, tbl[329 + tbloffset])); real2 v1891 = minusplus(v1889, v1890); real2 v1893 = minusplus(uminus(v1889), v1890); real2 v1907 = ctimesminusplus(reverse(v1893), tbl[336 + tbloffset], ctimes(v1893, tbl[337 + tbloffset])); real2 v2019 = plus(v1867, v1907); real2 v2013 = reverse(minus(v1867, v1907)); store(out, 30 << outShift, plus(v2018, v2019)); real2 v2032 = minus(v2018, v2019); store(out, 94 << outShift, ctimesminusplus(v2032, tbl[0 + tbloffset], ctimes(reverse(v2032), tbl[1 + tbloffset]))); real2 v2017 = minusplus(uminus(v2013), v2014); store(out, 126 << outShift, ctimesminusplus(reverse(v2017), tbl[356 + tbloffset], ctimes(v2017, tbl[357 + tbloffset]))); real2 v2015 = minusplus(v2013, v2014); store(out, 62 << outShift, ctimesminusplus(reverse(v2015), tbl[354 + tbloffset], ctimes(v2015, tbl[355 + tbloffset]))); real2 v1861 = ctimesminusplus(reverse(v1851), tbl[326 + tbloffset], ctimes(v1851, tbl[327 + tbloffset])); real2 v1901 = ctimesminusplus(reverse(v1891), tbl[334 + tbloffset], ctimes(v1891, tbl[335 + tbloffset])); real2 v1993 = plus(v1861, v1901); real2 v1987 = reverse(minus(v1861, v1901)); store(out, 14 << outShift, plus(v1992, v1993)); real2 v2006 = minus(v1992, v1993); store(out, 78 << outShift, ctimesminusplus(v2006, tbl[0 + tbloffset], ctimes(reverse(v2006), tbl[1 + tbloffset]))); real2 v1991 = minusplus(uminus(v1987), v1988); store(out, 110 << outShift, ctimesminusplus(reverse(v1991), tbl[352 + tbloffset], ctimes(v1991, tbl[353 + tbloffset]))); real2 v1989 = minusplus(v1987, v1988); store(out, 46 << outShift, ctimesminusplus(reverse(v1989), tbl[350 + tbloffset], ctimes(v1989, tbl[351 + tbloffset]))); real2 v593 = minusplus(v591, v592); real2 v595 = minusplus(uminus(v591), v592); real2 v473 = minusplus(v471, v472); real2 v475 = minusplus(uminus(v471), v472); real2 v555 = minusplus(uminus(v551), v552); real2 v553 = minusplus(v551, v552); real2 v609 = ctimesminusplus(reverse(v595), tbl[96 + tbloffset], ctimes(v595, tbl[97 + tbloffset])); real2 v195 = minusplus(uminus(v191), v192); real2 v193 = minusplus(v191, v192); real2 v275 = minusplus(uminus(v271), v272); real2 v273 = minusplus(v271, v272); real2 v673 = minusplus(v671, v672); real2 v675 = minusplus(uminus(v671), v672); real2 v689 = ctimesminusplus(reverse(v675), tbl[112 + tbloffset], ctimes(v675, tbl[113 + tbloffset])); real2 v209 = ctimesminusplus(reverse(v195), tbl[16 + tbloffset], ctimes(v195, tbl[17 + tbloffset])); real2 v289 = ctimesminusplus(reverse(v275), tbl[32 + tbloffset], ctimes(v275, tbl[33 + tbloffset])); real2 v755 = minusplus(uminus(v751), v752); real2 v753 = minusplus(v751, v752); real2 v435 = minusplus(uminus(v431), v432); real2 v433 = minusplus(v431, v432); real2 v513 = minusplus(v511, v512); real2 v515 = minusplus(uminus(v511), v512); real2 v529 = ctimesminusplus(reverse(v515), tbl[80 + tbloffset], ctimes(v515, tbl[81 + tbloffset])); real2 v353 = minusplus(v351, v352); real2 v355 = minusplus(uminus(v351), v352); real2 v369 = ctimesminusplus(reverse(v355), tbl[48 + tbloffset], ctimes(v355, tbl[49 + tbloffset])); real2 v2631 = plus(v369, v689); real2 v2625 = reverse(minus(v369, v689)); real2 v449 = ctimesminusplus(reverse(v435), tbl[64 + tbloffset], ctimes(v435, tbl[65 + tbloffset])); real2 v2710 = plus(v289, v609); real2 v2706 = minus(v609, v289); real2 v2630 = plus(v209, v529); real2 v2626 = minus(v529, v209); real2 v2790 = plus(v2630, v2631); real2 v2786 = minus(v2631, v2630); real2 v713 = minusplus(v711, v712); real2 v715 = minusplus(uminus(v711), v712); real2 v769 = ctimesminusplus(reverse(v755), tbl[128 + tbloffset], ctimes(v755, tbl[129 + tbloffset])); real2 v2705 = reverse(minus(v449, v769)); real2 v2711 = plus(v449, v769); real2 v313 = minusplus(v311, v312); real2 v315 = minusplus(uminus(v311), v312); real2 v393 = minusplus(v391, v392); real2 v395 = minusplus(uminus(v391), v392); real2 v409 = ctimesminusplus(reverse(v395), tbl[56 + tbloffset], ctimes(v395, tbl[57 + tbloffset])); real2 v729 = ctimesminusplus(reverse(v715), tbl[120 + tbloffset], ctimes(v715, tbl[121 + tbloffset])); real2 v329 = ctimesminusplus(reverse(v315), tbl[40 + tbloffset], ctimes(v315, tbl[41 + tbloffset])); real2 v489 = ctimesminusplus(reverse(v475), tbl[72 + tbloffset], ctimes(v475, tbl[73 + tbloffset])); real2 v153 = minusplus(v151, v152); real2 v155 = minusplus(uminus(v151), v152); real2 v169 = ctimesminusplus(reverse(v155), tbl[8 + tbloffset], ctimes(v155, tbl[9 + tbloffset])); real2 v2586 = minus(v489, v169); real2 v2590 = plus(v169, v489); real2 v233 = minusplus(v231, v232); real2 v235 = minusplus(uminus(v231), v232); real2 v633 = minusplus(v631, v632); real2 v635 = minusplus(uminus(v631), v632); real2 v649 = ctimesminusplus(reverse(v635), tbl[104 + tbloffset], ctimes(v635, tbl[105 + tbloffset])); real2 v249 = ctimesminusplus(reverse(v235), tbl[24 + tbloffset], ctimes(v235, tbl[25 + tbloffset])); real2 v569 = ctimesminusplus(reverse(v555), tbl[88 + tbloffset], ctimes(v555, tbl[89 + tbloffset])); real2 v2670 = plus(v249, v569); real2 v2666 = minus(v569, v249); real2 v2785 = reverse(minus(v2710, v2711)); real2 v2791 = plus(v2710, v2711); real2 v2825 = reverse(minus(v2790, v2791)); real2 v2831 = plus(v2790, v2791); real2 v2671 = plus(v409, v729); real2 v2665 = reverse(minus(v409, v729)); real2 v2745 = reverse(minus(v2670, v2671)); real2 v2751 = plus(v2670, v2671); real2 v2806 = minus(v2731, v2730); real2 v2810 = plus(v2730, v2731); real2 v2846 = minus(v2811, v2810); real2 v2850 = plus(v2810, v2811); real2 v2591 = plus(v329, v649); real2 v2585 = reverse(minus(v329, v649)); real2 v2750 = plus(v2590, v2591); real2 v2746 = minus(v2591, v2590); real2 v2830 = plus(v2750, v2751); real2 v2826 = minus(v2751, v2750); real2 v2845 = reverse(minus(v2830, v2831)); real2 v2851 = plus(v2830, v2831); store(out, 3 << outShift, plus(v2850, v2851)); real2 v2864 = minus(v2850, v2851); store(out, 67 << outShift, ctimesminusplus(v2864, tbl[0 + tbloffset], ctimes(reverse(v2864), tbl[1 + tbloffset]))); real2 v2849 = minusplus(uminus(v2845), v2846); real2 v2847 = minusplus(v2845, v2846); store(out, 35 << outShift, ctimesminusplus(reverse(v2847), tbl[506 + tbloffset], ctimes(v2847, tbl[507 + tbloffset]))); store(out, 99 << outShift, ctimesminusplus(reverse(v2849), tbl[508 + tbloffset], ctimes(v2849, tbl[509 + tbloffset]))); real2 v2827 = minusplus(v2825, v2826); real2 v2829 = minusplus(uminus(v2825), v2826); real2 v2837 = ctimesminusplus(reverse(v2827), tbl[502 + tbloffset], ctimes(v2827, tbl[503 + tbloffset])); real2 v2809 = minusplus(uminus(v2805), v2806); real2 v2807 = minusplus(v2805, v2806); real2 v2817 = ctimesminusplus(reverse(v2807), tbl[498 + tbloffset], ctimes(v2807, tbl[499 + tbloffset])); store(out, 19 << outShift, plus(v2817, v2837)); real2 v2870 = minus(v2817, v2837); store(out, 83 << outShift, ctimesminusplus(v2870, tbl[0 + tbloffset], ctimes(reverse(v2870), tbl[1 + tbloffset]))); real2 v2823 = ctimesminusplus(reverse(v2809), tbl[500 + tbloffset], ctimes(v2809, tbl[501 + tbloffset])); real2 v2843 = ctimesminusplus(reverse(v2829), tbl[504 + tbloffset], ctimes(v2829, tbl[505 + tbloffset])); store(out, 51 << outShift, plus(v2823, v2843)); real2 v2876 = minus(v2823, v2843); store(out, 115 << outShift, ctimesminusplus(v2876, tbl[0 + tbloffset], ctimes(reverse(v2876), tbl[1 + tbloffset]))); real2 v2787 = minusplus(v2785, v2786); real2 v2789 = minusplus(uminus(v2785), v2786); real2 v2803 = ctimesminusplus(reverse(v2789), tbl[496 + tbloffset], ctimes(v2789, tbl[497 + tbloffset])); real2 v2727 = minusplus(v2725, v2726); real2 v2729 = minusplus(uminus(v2725), v2726); real2 v2743 = ctimesminusplus(reverse(v2729), tbl[484 + tbloffset], ctimes(v2729, tbl[485 + tbloffset])); real2 v2914 = plus(v2743, v2783); real2 v2910 = minus(v2783, v2743); real2 v2749 = minusplus(uminus(v2745), v2746); real2 v2747 = minusplus(v2745, v2746); real2 v2763 = ctimesminusplus(reverse(v2749), tbl[488 + tbloffset], ctimes(v2749, tbl[489 + tbloffset])); real2 v2909 = reverse(minus(v2763, v2803)); real2 v2915 = plus(v2763, v2803); store(out, 27 << outShift, plus(v2914, v2915)); real2 v2928 = minus(v2914, v2915); store(out, 91 << outShift, ctimesminusplus(v2928, tbl[0 + tbloffset], ctimes(reverse(v2928), tbl[1 + tbloffset]))); real2 v2913 = minusplus(uminus(v2909), v2910); store(out, 123 << outShift, ctimesminusplus(reverse(v2913), tbl[516 + tbloffset], ctimes(v2913, tbl[517 + tbloffset]))); real2 v2911 = minusplus(v2909, v2910); store(out, 59 << outShift, ctimesminusplus(reverse(v2911), tbl[514 + tbloffset], ctimes(v2911, tbl[515 + tbloffset]))); real2 v2737 = ctimesminusplus(reverse(v2727), tbl[482 + tbloffset], ctimes(v2727, tbl[483 + tbloffset])); real2 v2888 = plus(v2737, v2777); real2 v2884 = minus(v2777, v2737); real2 v2797 = ctimesminusplus(reverse(v2787), tbl[494 + tbloffset], ctimes(v2787, tbl[495 + tbloffset])); real2 v2757 = ctimesminusplus(reverse(v2747), tbl[486 + tbloffset], ctimes(v2747, tbl[487 + tbloffset])); real2 v2889 = plus(v2757, v2797); real2 v2883 = reverse(minus(v2757, v2797)); store(out, 11 << outShift, plus(v2888, v2889)); real2 v2902 = minus(v2888, v2889); store(out, 75 << outShift, ctimesminusplus(v2902, tbl[0 + tbloffset], ctimes(reverse(v2902), tbl[1 + tbloffset]))); real2 v2887 = minusplus(uminus(v2883), v2884); store(out, 107 << outShift, ctimesminusplus(reverse(v2887), tbl[512 + tbloffset], ctimes(v2887, tbl[513 + tbloffset]))); real2 v2885 = minusplus(v2883, v2884); store(out, 43 << outShift, ctimesminusplus(reverse(v2885), tbl[510 + tbloffset], ctimes(v2885, tbl[511 + tbloffset]))); real2 v2669 = minusplus(uminus(v2665), v2666); real2 v2667 = minusplus(v2665, v2666); real2 v2707 = minusplus(v2705, v2706); real2 v2709 = minusplus(uminus(v2705), v2706); real2 v2717 = ctimesminusplus(reverse(v2707), tbl[478 + tbloffset], ctimes(v2707, tbl[479 + tbloffset])); real2 v2627 = minusplus(v2625, v2626); real2 v2629 = minusplus(uminus(v2625), v2626); real2 v2637 = ctimesminusplus(reverse(v2627), tbl[462 + tbloffset], ctimes(v2627, tbl[463 + tbloffset])); real2 v2961 = plus(v2637, v2717); real2 v2955 = reverse(minus(v2637, v2717)); real2 v2649 = minusplus(uminus(v2645), v2646); real2 v2647 = minusplus(v2645, v2646); real2 v2569 = minusplus(uminus(v2565), v2566); real2 v2567 = minusplus(v2565, v2566); real2 v2577 = ctimesminusplus(reverse(v2567), tbl[450 + tbloffset], ctimes(v2567, tbl[451 + tbloffset])); real2 v2657 = ctimesminusplus(reverse(v2647), tbl[466 + tbloffset], ctimes(v2647, tbl[467 + tbloffset])); real2 v2936 = minus(v2657, v2577); real2 v2940 = plus(v2577, v2657); real2 v2976 = minus(v2941, v2940); real2 v2980 = plus(v2940, v2941); real2 v2677 = ctimesminusplus(reverse(v2667), tbl[470 + tbloffset], ctimes(v2667, tbl[471 + tbloffset])); real2 v2587 = minusplus(v2585, v2586); real2 v2589 = minusplus(uminus(v2585), v2586); real2 v2597 = ctimesminusplus(reverse(v2587), tbl[454 + tbloffset], ctimes(v2587, tbl[455 + tbloffset])); real2 v2956 = minus(v2677, v2597); real2 v2960 = plus(v2597, v2677); real2 v2975 = reverse(minus(v2960, v2961)); real2 v2981 = plus(v2960, v2961); store(out, 7 << outShift, plus(v2980, v2981)); real2 v2994 = minus(v2980, v2981); store(out, 71 << outShift, ctimesminusplus(v2994, tbl[0 + tbloffset], ctimes(reverse(v2994), tbl[1 + tbloffset]))); real2 v2979 = minusplus(uminus(v2975), v2976); store(out, 103 << outShift, ctimesminusplus(reverse(v2979), tbl[528 + tbloffset], ctimes(v2979, tbl[529 + tbloffset]))); real2 v2977 = minusplus(v2975, v2976); store(out, 39 << outShift, ctimesminusplus(reverse(v2977), tbl[526 + tbloffset], ctimes(v2977, tbl[527 + tbloffset]))); real2 v2939 = minusplus(uminus(v2935), v2936); real2 v2937 = minusplus(v2935, v2936); real2 v2953 = ctimesminusplus(reverse(v2939), tbl[520 + tbloffset], ctimes(v2939, tbl[521 + tbloffset])); real2 v2957 = minusplus(v2955, v2956); real2 v2959 = minusplus(uminus(v2955), v2956); real2 v2973 = ctimesminusplus(reverse(v2959), tbl[524 + tbloffset], ctimes(v2959, tbl[525 + tbloffset])); store(out, 55 << outShift, plus(v2953, v2973)); real2 v3006 = minus(v2953, v2973); store(out, 119 << outShift, ctimesminusplus(v3006, tbl[0 + tbloffset], ctimes(reverse(v3006), tbl[1 + tbloffset]))); real2 v2947 = ctimesminusplus(reverse(v2937), tbl[518 + tbloffset], ctimes(v2937, tbl[519 + tbloffset])); real2 v2967 = ctimesminusplus(reverse(v2957), tbl[522 + tbloffset], ctimes(v2957, tbl[523 + tbloffset])); store(out, 23 << outShift, plus(v2947, v2967)); real2 v3000 = minus(v2947, v2967); store(out, 87 << outShift, ctimesminusplus(v3000, tbl[0 + tbloffset], ctimes(reverse(v3000), tbl[1 + tbloffset]))); real2 v2663 = ctimesminusplus(reverse(v2649), tbl[468 + tbloffset], ctimes(v2649, tbl[469 + tbloffset])); real2 v2583 = ctimesminusplus(reverse(v2569), tbl[452 + tbloffset], ctimes(v2569, tbl[453 + tbloffset])); real2 v3014 = minus(v2663, v2583); real2 v3018 = plus(v2583, v2663); real2 v3015 = minusplus(v3013, v3014); real2 v3017 = minusplus(uminus(v3013), v3014); real2 v2643 = ctimesminusplus(reverse(v2629), tbl[464 + tbloffset], ctimes(v2629, tbl[465 + tbloffset])); real2 v2723 = ctimesminusplus(reverse(v2709), tbl[480 + tbloffset], ctimes(v2709, tbl[481 + tbloffset])); real2 v3039 = plus(v2643, v2723); real2 v3033 = reverse(minus(v2643, v2723)); real2 v2683 = ctimesminusplus(reverse(v2669), tbl[472 + tbloffset], ctimes(v2669, tbl[473 + tbloffset])); real2 v3031 = ctimesminusplus(reverse(v3017), tbl[532 + tbloffset], ctimes(v3017, tbl[533 + tbloffset])); real2 v2603 = ctimesminusplus(reverse(v2589), tbl[456 + tbloffset], ctimes(v2589, tbl[457 + tbloffset])); real2 v3034 = minus(v2683, v2603); real2 v3038 = plus(v2603, v2683); real2 v3037 = minusplus(uminus(v3033), v3034); real2 v3035 = minusplus(v3033, v3034); real2 v3051 = ctimesminusplus(reverse(v3037), tbl[536 + tbloffset], ctimes(v3037, tbl[537 + tbloffset])); store(out, 63 << outShift, plus(v3031, v3051)); real2 v3084 = minus(v3031, v3051); store(out, 127 << outShift, ctimesminusplus(v3084, tbl[0 + tbloffset], ctimes(reverse(v3084), tbl[1 + tbloffset]))); real2 v3025 = ctimesminusplus(reverse(v3015), tbl[530 + tbloffset], ctimes(v3015, tbl[531 + tbloffset])); real2 v3045 = ctimesminusplus(reverse(v3035), tbl[534 + tbloffset], ctimes(v3035, tbl[535 + tbloffset])); store(out, 31 << outShift, plus(v3025, v3045)); real2 v3078 = minus(v3025, v3045); store(out, 95 << outShift, ctimesminusplus(v3078, tbl[0 + tbloffset], ctimes(reverse(v3078), tbl[1 + tbloffset]))); real2 v3058 = plus(v3018, v3019); real2 v3054 = minus(v3019, v3018); real2 v3053 = reverse(minus(v3038, v3039)); real2 v3059 = plus(v3038, v3039); real2 v3055 = minusplus(v3053, v3054); store(out, 47 << outShift, ctimesminusplus(reverse(v3055), tbl[538 + tbloffset], ctimes(v3055, tbl[539 + tbloffset]))); real2 v3057 = minusplus(uminus(v3053), v3054); store(out, 111 << outShift, ctimesminusplus(reverse(v3057), tbl[540 + tbloffset], ctimes(v3057, tbl[541 + tbloffset]))); store(out, 15 << outShift, plus(v3058, v3059)); real2 v3072 = minus(v3058, v3059); store(out, 79 << outShift, ctimesminusplus(v3072, tbl[0 + tbloffset], ctimes(reverse(v3072), tbl[1 + tbloffset]))); real2 v683 = ctimesminusplus(reverse(v673), tbl[110 + tbloffset], ctimes(v673, tbl[111 + tbloffset])); real2 v363 = ctimesminusplus(reverse(v353), tbl[46 + tbloffset], ctimes(v353, tbl[47 + tbloffset])); real2 v2105 = plus(v363, v683); real2 v2099 = reverse(minus(v363, v683)); real2 v283 = ctimesminusplus(reverse(v273), tbl[30 + tbloffset], ctimes(v273, tbl[31 + tbloffset])); real2 v723 = ctimesminusplus(reverse(v713), tbl[118 + tbloffset], ctimes(v713, tbl[119 + tbloffset])); real2 v403 = ctimesminusplus(reverse(v393), tbl[54 + tbloffset], ctimes(v393, tbl[55 + tbloffset])); real2 v603 = ctimesminusplus(reverse(v593), tbl[94 + tbloffset], ctimes(v593, tbl[95 + tbloffset])); real2 v2180 = minus(v603, v283); real2 v2184 = plus(v283, v603); real2 v2145 = plus(v403, v723); real2 v2139 = reverse(minus(v403, v723)); real2 v543 = ctimesminusplus(reverse(v533), tbl[82 + tbloffset], ctimes(v533, tbl[83 + tbloffset])); real2 v383 = ctimesminusplus(reverse(v373), tbl[50 + tbloffset], ctimes(v373, tbl[51 + tbloffset])); real2 v703 = ctimesminusplus(reverse(v693), tbl[114 + tbloffset], ctimes(v693, tbl[115 + tbloffset])); real2 v2125 = plus(v383, v703); real2 v2119 = reverse(minus(v383, v703)); real2 v223 = ctimesminusplus(reverse(v213), tbl[18 + tbloffset], ctimes(v213, tbl[19 + tbloffset])); real2 v2120 = minus(v543, v223); real2 v2124 = plus(v223, v543); real2 v443 = ctimesminusplus(reverse(v433), tbl[62 + tbloffset], ctimes(v433, tbl[63 + tbloffset])); real2 v203 = ctimesminusplus(reverse(v193), tbl[14 + tbloffset], ctimes(v193, tbl[15 + tbloffset])); real2 v763 = ctimesminusplus(reverse(v753), tbl[126 + tbloffset], ctimes(v753, tbl[127 + tbloffset])); real2 v2179 = reverse(minus(v443, v763)); real2 v2185 = plus(v443, v763); real2 v523 = ctimesminusplus(reverse(v513), tbl[78 + tbloffset], ctimes(v513, tbl[79 + tbloffset])); real2 v2100 = minus(v523, v203); real2 v2104 = plus(v203, v523); real2 v2264 = plus(v2104, v2105); real2 v2260 = minus(v2105, v2104); real2 v643 = ctimesminusplus(reverse(v633), tbl[102 + tbloffset], ctimes(v633, tbl[103 + tbloffset])); real2 v2265 = plus(v2184, v2185); real2 v2259 = reverse(minus(v2184, v2185)); real2 v563 = ctimesminusplus(reverse(v553), tbl[86 + tbloffset], ctimes(v553, tbl[87 + tbloffset])); real2 v243 = ctimesminusplus(reverse(v233), tbl[22 + tbloffset], ctimes(v233, tbl[23 + tbloffset])); real2 v2144 = plus(v243, v563); real2 v2140 = minus(v563, v243); real2 v143 = ctimesminusplus(reverse(v133), tbl[2 + tbloffset], ctimes(v133, tbl[3 + tbloffset])); real2 v183 = ctimesminusplus(reverse(v173), tbl[10 + tbloffset], ctimes(v173, tbl[11 + tbloffset])); real2 v2084 = plus(v183, v503); real2 v2080 = minus(v503, v183); real2 v163 = ctimesminusplus(reverse(v153), tbl[6 + tbloffset], ctimes(v153, tbl[7 + tbloffset])); real2 v303 = ctimesminusplus(reverse(v293), tbl[34 + tbloffset], ctimes(v293, tbl[35 + tbloffset])); real2 v623 = ctimesminusplus(reverse(v613), tbl[98 + tbloffset], ctimes(v613, tbl[99 + tbloffset])); real2 v2039 = reverse(minus(v303, v623)); real2 v2045 = plus(v303, v623); real2 v463 = ctimesminusplus(reverse(v453), tbl[66 + tbloffset], ctimes(v453, tbl[67 + tbloffset])); real2 v2044 = plus(v143, v463); real2 v2040 = minus(v463, v143); real2 v2204 = plus(v2044, v2045); real2 v2200 = minus(v2045, v2044); real2 v323 = ctimesminusplus(reverse(v313), tbl[38 + tbloffset], ctimes(v313, tbl[39 + tbloffset])); real2 v2205 = plus(v2124, v2125); real2 v2199 = reverse(minus(v2124, v2125)); real2 v2280 = minus(v2205, v2204); real2 v2284 = plus(v2204, v2205); real2 v2225 = plus(v2144, v2145); real2 v2219 = reverse(minus(v2144, v2145)); real2 v2305 = plus(v2264, v2265); real2 v2299 = reverse(minus(v2264, v2265)); real2 v2240 = minus(v2085, v2084); real2 v2244 = plus(v2084, v2085); real2 v2279 = reverse(minus(v2244, v2245)); real2 v2285 = plus(v2244, v2245); real2 v2281 = minusplus(v2279, v2280); real2 v2283 = minusplus(uminus(v2279), v2280); real2 v2291 = ctimesminusplus(reverse(v2281), tbl[406 + tbloffset], ctimes(v2281, tbl[407 + tbloffset])); real2 v483 = ctimesminusplus(reverse(v473), tbl[70 + tbloffset], ctimes(v473, tbl[71 + tbloffset])); real2 v2060 = minus(v483, v163); real2 v2064 = plus(v163, v483); real2 v2065 = plus(v323, v643); real2 v2059 = reverse(minus(v323, v643)); real2 v2220 = minus(v2065, v2064); real2 v2224 = plus(v2064, v2065); real2 v2304 = plus(v2224, v2225); real2 v2300 = minus(v2225, v2224); real2 v2301 = minusplus(v2299, v2300); real2 v2303 = minusplus(uminus(v2299), v2300); real2 v2311 = ctimesminusplus(reverse(v2301), tbl[410 + tbloffset], ctimes(v2301, tbl[411 + tbloffset])); store(out, 17 << outShift, plus(v2291, v2311)); real2 v2344 = minus(v2291, v2311); store(out, 81 << outShift, ctimesminusplus(v2344, tbl[0 + tbloffset], ctimes(reverse(v2344), tbl[1 + tbloffset]))); real2 v2297 = ctimesminusplus(reverse(v2283), tbl[408 + tbloffset], ctimes(v2283, tbl[409 + tbloffset])); real2 v2317 = ctimesminusplus(reverse(v2303), tbl[412 + tbloffset], ctimes(v2303, tbl[413 + tbloffset])); store(out, 49 << outShift, plus(v2297, v2317)); real2 v2350 = minus(v2297, v2317); store(out, 113 << outShift, ctimesminusplus(v2350, tbl[0 + tbloffset], ctimes(reverse(v2350), tbl[1 + tbloffset]))); real2 v2320 = minus(v2285, v2284); real2 v2324 = plus(v2284, v2285); real2 v2325 = plus(v2304, v2305); real2 v2319 = reverse(minus(v2304, v2305)); store(out, 1 << outShift, plus(v2324, v2325)); real2 v2338 = minus(v2324, v2325); store(out, 65 << outShift, ctimesminusplus(v2338, tbl[0 + tbloffset], ctimes(reverse(v2338), tbl[1 + tbloffset]))); real2 v2321 = minusplus(v2319, v2320); store(out, 33 << outShift, ctimesminusplus(reverse(v2321), tbl[414 + tbloffset], ctimes(v2321, tbl[415 + tbloffset]))); real2 v2323 = minusplus(uminus(v2319), v2320); store(out, 97 << outShift, ctimesminusplus(reverse(v2323), tbl[416 + tbloffset], ctimes(v2323, tbl[417 + tbloffset]))); real2 v2201 = minusplus(v2199, v2200); real2 v2203 = minusplus(uminus(v2199), v2200); real2 v2263 = minusplus(uminus(v2259), v2260); real2 v2261 = minusplus(v2259, v2260); real2 v2243 = minusplus(uminus(v2239), v2240); real2 v2241 = minusplus(v2239, v2240); real2 v2257 = ctimesminusplus(reverse(v2243), tbl[400 + tbloffset], ctimes(v2243, tbl[401 + tbloffset])); real2 v2217 = ctimesminusplus(reverse(v2203), tbl[392 + tbloffset], ctimes(v2203, tbl[393 + tbloffset])); real2 v2388 = plus(v2217, v2257); real2 v2384 = minus(v2257, v2217); real2 v2277 = ctimesminusplus(reverse(v2263), tbl[404 + tbloffset], ctimes(v2263, tbl[405 + tbloffset])); real2 v2221 = minusplus(v2219, v2220); real2 v2223 = minusplus(uminus(v2219), v2220); real2 v2237 = ctimesminusplus(reverse(v2223), tbl[396 + tbloffset], ctimes(v2223, tbl[397 + tbloffset])); real2 v2389 = plus(v2237, v2277); real2 v2383 = reverse(minus(v2237, v2277)); store(out, 25 << outShift, plus(v2388, v2389)); real2 v2402 = minus(v2388, v2389); store(out, 89 << outShift, ctimesminusplus(v2402, tbl[0 + tbloffset], ctimes(reverse(v2402), tbl[1 + tbloffset]))); real2 v2385 = minusplus(v2383, v2384); real2 v2387 = minusplus(uminus(v2383), v2384); store(out, 121 << outShift, ctimesminusplus(reverse(v2387), tbl[424 + tbloffset], ctimes(v2387, tbl[425 + tbloffset]))); store(out, 57 << outShift, ctimesminusplus(reverse(v2385), tbl[422 + tbloffset], ctimes(v2385, tbl[423 + tbloffset]))); real2 v2251 = ctimesminusplus(reverse(v2241), tbl[398 + tbloffset], ctimes(v2241, tbl[399 + tbloffset])); real2 v2211 = ctimesminusplus(reverse(v2201), tbl[390 + tbloffset], ctimes(v2201, tbl[391 + tbloffset])); real2 v2358 = minus(v2251, v2211); real2 v2362 = plus(v2211, v2251); real2 v2271 = ctimesminusplus(reverse(v2261), tbl[402 + tbloffset], ctimes(v2261, tbl[403 + tbloffset])); real2 v2231 = ctimesminusplus(reverse(v2221), tbl[394 + tbloffset], ctimes(v2221, tbl[395 + tbloffset])); real2 v2357 = reverse(minus(v2231, v2271)); real2 v2363 = plus(v2231, v2271); store(out, 9 << outShift, plus(v2362, v2363)); real2 v2376 = minus(v2362, v2363); store(out, 73 << outShift, ctimesminusplus(v2376, tbl[0 + tbloffset], ctimes(reverse(v2376), tbl[1 + tbloffset]))); real2 v2361 = minusplus(uminus(v2357), v2358); store(out, 105 << outShift, ctimesminusplus(reverse(v2361), tbl[420 + tbloffset], ctimes(v2361, tbl[421 + tbloffset]))); real2 v2359 = minusplus(v2357, v2358); store(out, 41 << outShift, ctimesminusplus(reverse(v2359), tbl[418 + tbloffset], ctimes(v2359, tbl[419 + tbloffset]))); real2 v2121 = minusplus(v2119, v2120); real2 v2123 = minusplus(uminus(v2119), v2120); real2 v2083 = minusplus(uminus(v2079), v2080); real2 v2081 = minusplus(v2079, v2080); real2 v2091 = ctimesminusplus(reverse(v2081), tbl[366 + tbloffset], ctimes(v2081, tbl[367 + tbloffset])); real2 v2043 = minusplus(uminus(v2039), v2040); real2 v2041 = minusplus(v2039, v2040); real2 v2051 = ctimesminusplus(reverse(v2041), tbl[358 + tbloffset], ctimes(v2041, tbl[359 + tbloffset])); real2 v2131 = ctimesminusplus(reverse(v2121), tbl[374 + tbloffset], ctimes(v2121, tbl[375 + tbloffset])); real2 v2163 = minusplus(uminus(v2159), v2160); real2 v2161 = minusplus(v2159, v2160); real2 v2171 = ctimesminusplus(reverse(v2161), tbl[382 + tbloffset], ctimes(v2161, tbl[383 + tbloffset])); real2 v2409 = reverse(minus(v2091, v2171)); real2 v2415 = plus(v2091, v2171); real2 v2410 = minus(v2131, v2051); real2 v2414 = plus(v2051, v2131); real2 v2454 = plus(v2414, v2415); real2 v2450 = minus(v2415, v2414); real2 v2181 = minusplus(v2179, v2180); real2 v2183 = minusplus(uminus(v2179), v2180); real2 v2191 = ctimesminusplus(reverse(v2181), tbl[386 + tbloffset], ctimes(v2181, tbl[387 + tbloffset])); real2 v2103 = minusplus(uminus(v2099), v2100); real2 v2101 = minusplus(v2099, v2100); real2 v2111 = ctimesminusplus(reverse(v2101), tbl[370 + tbloffset], ctimes(v2101, tbl[371 + tbloffset])); real2 v2435 = plus(v2111, v2191); real2 v2429 = reverse(minus(v2111, v2191)); real2 v2141 = minusplus(v2139, v2140); real2 v2143 = minusplus(uminus(v2139), v2140); real2 v2151 = ctimesminusplus(reverse(v2141), tbl[378 + tbloffset], ctimes(v2141, tbl[379 + tbloffset])); real2 v2063 = minusplus(uminus(v2059), v2060); real2 v2061 = minusplus(v2059, v2060); real2 v2071 = ctimesminusplus(reverse(v2061), tbl[362 + tbloffset], ctimes(v2061, tbl[363 + tbloffset])); real2 v2434 = plus(v2071, v2151); real2 v2430 = minus(v2151, v2071); real2 v2455 = plus(v2434, v2435); real2 v2449 = reverse(minus(v2434, v2435)); store(out, 5 << outShift, plus(v2454, v2455)); real2 v2468 = minus(v2454, v2455); store(out, 69 << outShift, ctimesminusplus(v2468, tbl[0 + tbloffset], ctimes(reverse(v2468), tbl[1 + tbloffset]))); real2 v2451 = minusplus(v2449, v2450); real2 v2453 = minusplus(uminus(v2449), v2450); store(out, 101 << outShift, ctimesminusplus(reverse(v2453), tbl[436 + tbloffset], ctimes(v2453, tbl[437 + tbloffset]))); store(out, 37 << outShift, ctimesminusplus(reverse(v2451), tbl[434 + tbloffset], ctimes(v2451, tbl[435 + tbloffset]))); real2 v2411 = minusplus(v2409, v2410); real2 v2413 = minusplus(uminus(v2409), v2410); real2 v2433 = minusplus(uminus(v2429), v2430); real2 v2431 = minusplus(v2429, v2430); real2 v2421 = ctimesminusplus(reverse(v2411), tbl[426 + tbloffset], ctimes(v2411, tbl[427 + tbloffset])); real2 v2441 = ctimesminusplus(reverse(v2431), tbl[430 + tbloffset], ctimes(v2431, tbl[431 + tbloffset])); store(out, 21 << outShift, plus(v2421, v2441)); real2 v2474 = minus(v2421, v2441); store(out, 85 << outShift, ctimesminusplus(v2474, tbl[0 + tbloffset], ctimes(reverse(v2474), tbl[1 + tbloffset]))); real2 v2427 = ctimesminusplus(reverse(v2413), tbl[428 + tbloffset], ctimes(v2413, tbl[429 + tbloffset])); real2 v2447 = ctimesminusplus(reverse(v2433), tbl[432 + tbloffset], ctimes(v2433, tbl[433 + tbloffset])); store(out, 53 << outShift, plus(v2427, v2447)); real2 v2480 = minus(v2427, v2447); store(out, 117 << outShift, ctimesminusplus(v2480, tbl[0 + tbloffset], ctimes(reverse(v2480), tbl[1 + tbloffset]))); real2 v2057 = ctimesminusplus(reverse(v2043), tbl[360 + tbloffset], ctimes(v2043, tbl[361 + tbloffset])); real2 v2097 = ctimesminusplus(reverse(v2083), tbl[368 + tbloffset], ctimes(v2083, tbl[369 + tbloffset])); real2 v2157 = ctimesminusplus(reverse(v2143), tbl[380 + tbloffset], ctimes(v2143, tbl[381 + tbloffset])); real2 v2197 = ctimesminusplus(reverse(v2183), tbl[388 + tbloffset], ctimes(v2183, tbl[389 + tbloffset])); real2 v2117 = ctimesminusplus(reverse(v2103), tbl[372 + tbloffset], ctimes(v2103, tbl[373 + tbloffset])); real2 v2507 = reverse(minus(v2117, v2197)); real2 v2513 = plus(v2117, v2197); real2 v2137 = ctimesminusplus(reverse(v2123), tbl[376 + tbloffset], ctimes(v2123, tbl[377 + tbloffset])); real2 v2488 = minus(v2137, v2057); real2 v2492 = plus(v2057, v2137); real2 v2177 = ctimesminusplus(reverse(v2163), tbl[384 + tbloffset], ctimes(v2163, tbl[385 + tbloffset])); real2 v2493 = plus(v2097, v2177); real2 v2487 = reverse(minus(v2097, v2177)); real2 v2532 = plus(v2492, v2493); real2 v2528 = minus(v2493, v2492); real2 v2077 = ctimesminusplus(reverse(v2063), tbl[364 + tbloffset], ctimes(v2063, tbl[365 + tbloffset])); real2 v2512 = plus(v2077, v2157); real2 v2508 = minus(v2157, v2077); real2 v2527 = reverse(minus(v2512, v2513)); real2 v2533 = plus(v2512, v2513); real2 v2529 = minusplus(v2527, v2528); real2 v2531 = minusplus(uminus(v2527), v2528); store(out, 109 << outShift, ctimesminusplus(reverse(v2531), tbl[448 + tbloffset], ctimes(v2531, tbl[449 + tbloffset]))); store(out, 45 << outShift, ctimesminusplus(reverse(v2529), tbl[446 + tbloffset], ctimes(v2529, tbl[447 + tbloffset]))); store(out, 13 << outShift, plus(v2532, v2533)); real2 v2546 = minus(v2532, v2533); store(out, 77 << outShift, ctimesminusplus(v2546, tbl[0 + tbloffset], ctimes(reverse(v2546), tbl[1 + tbloffset]))); real2 v2509 = minusplus(v2507, v2508); real2 v2511 = minusplus(uminus(v2507), v2508); real2 v2491 = minusplus(uminus(v2487), v2488); real2 v2489 = minusplus(v2487, v2488); real2 v2499 = ctimesminusplus(reverse(v2489), tbl[438 + tbloffset], ctimes(v2489, tbl[439 + tbloffset])); real2 v2519 = ctimesminusplus(reverse(v2509), tbl[442 + tbloffset], ctimes(v2509, tbl[443 + tbloffset])); store(out, 29 << outShift, plus(v2499, v2519)); real2 v2552 = minus(v2499, v2519); store(out, 93 << outShift, ctimesminusplus(v2552, tbl[0 + tbloffset], ctimes(reverse(v2552), tbl[1 + tbloffset]))); real2 v2505 = ctimesminusplus(reverse(v2491), tbl[440 + tbloffset], ctimes(v2491, tbl[441 + tbloffset])); real2 v2525 = ctimesminusplus(reverse(v2511), tbl[444 + tbloffset], ctimes(v2511, tbl[445 + tbloffset])); store(out, 61 << outShift, plus(v2505, v2525)); real2 v2558 = minus(v2505, v2525); store(out, 125 << outShift, ctimesminusplus(v2558, tbl[0 + tbloffset], ctimes(reverse(v2558), tbl[1 + tbloffset]))); // Pres : 76263 } } ALIGNED(8192) void but128b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const int outShift, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * (i0 >> outShift); // Pres : 148586 real2 v56 = load(in, 54 << inShift); real2 v120 = load(in, 118 << inShift); real2 v571 = reverse(minus(v120, v56)); real2 v577 = plus(v56, v120); real2 v24 = load(in, 22 << inShift); real2 v88 = load(in, 86 << inShift); real2 v576 = plus(v24, v88); real2 v572 = minus(v88, v24); real2 v573 = minusplus(v571, v572); real2 v575 = minusplus(uminus(v571), v572); real2 v589 = ctimesminusplus(reverse(v575), tbl[92 + tbloffset], ctimes(v575, tbl[93 + tbloffset])); real2 v583 = ctimesminusplus(reverse(v573), tbl[90 + tbloffset], ctimes(v573, tbl[91 + tbloffset])); real2 v897 = plus(v576, v577); real2 v891 = reverse(minus(v577, v576)); real2 v8 = load(in, 6 << inShift); real2 v72 = load(in, 70 << inShift); real2 v252 = minus(v72, v8); real2 v256 = plus(v8, v72); real2 v104 = load(in, 102 << inShift); real2 v40 = load(in, 38 << inShift); real2 v251 = reverse(minus(v104, v40)); real2 v257 = plus(v40, v104); real2 v255 = minusplus(uminus(v251), v252); real2 v253 = minusplus(v251, v252); real2 v263 = ctimesminusplus(reverse(v253), tbl[26 + tbloffset], ctimes(v253, tbl[27 + tbloffset])); real2 v896 = plus(v256, v257); real2 v892 = minus(v257, v256); real2 v895 = minusplus(uminus(v891), v892); real2 v893 = minusplus(v891, v892); real2 v909 = ctimesminusplus(reverse(v895), tbl[156 + tbloffset], ctimes(v895, tbl[157 + tbloffset])); real2 v903 = ctimesminusplus(reverse(v893), tbl[154 + tbloffset], ctimes(v893, tbl[155 + tbloffset])); real2 v269 = ctimesminusplus(reverse(v255), tbl[28 + tbloffset], ctimes(v255, tbl[29 + tbloffset])); real2 v1216 = plus(v896, v897); real2 v1212 = minus(v897, v896); real2 v2160 = minus(v583, v263); real2 v2164 = plus(v263, v583); real2 v2686 = minus(v589, v269); real2 v2690 = plus(v269, v589); real2 v96 = load(in, 94 << inShift); real2 v32 = load(in, 30 << inShift); real2 v736 = plus(v32, v96); real2 v732 = minus(v96, v32); real2 v64 = load(in, 62 << inShift); real2 v128 = load(in, 126 << inShift); real2 v737 = plus(v64, v128); real2 v731 = reverse(minus(v128, v64)); real2 v1057 = plus(v736, v737); real2 v1051 = reverse(minus(v737, v736)); real2 v733 = minusplus(v731, v732); real2 v735 = minusplus(uminus(v731), v732); real2 v749 = ctimesminusplus(reverse(v735), tbl[124 + tbloffset], ctimes(v735, tbl[125 + tbloffset])); real2 v743 = ctimesminusplus(reverse(v733), tbl[122 + tbloffset], ctimes(v733, tbl[123 + tbloffset])); real2 v16 = load(in, 14 << inShift); real2 v80 = load(in, 78 << inShift); real2 v412 = minus(v80, v16); real2 v416 = plus(v16, v80); real2 v112 = load(in, 110 << inShift); real2 v48 = load(in, 46 << inShift); real2 v417 = plus(v48, v112); real2 v411 = reverse(minus(v112, v48)); real2 v1056 = plus(v416, v417); real2 v1052 = minus(v417, v416); real2 v1055 = minusplus(uminus(v1051), v1052); real2 v1053 = minusplus(v1051, v1052); real2 v1063 = ctimesminusplus(reverse(v1053), tbl[186 + tbloffset], ctimes(v1053, tbl[187 + tbloffset])); real2 v1665 = plus(v903, v1063); real2 v1659 = reverse(minus(v1063, v903)); real2 v1069 = ctimesminusplus(reverse(v1055), tbl[188 + tbloffset], ctimes(v1055, tbl[189 + tbloffset])); real2 v1869 = reverse(minus(v1069, v909)); real2 v1875 = plus(v909, v1069); real2 v413 = minusplus(v411, v412); real2 v415 = minusplus(uminus(v411), v412); real2 v429 = ctimesminusplus(reverse(v415), tbl[60 + tbloffset], ctimes(v415, tbl[61 + tbloffset])); real2 v1217 = plus(v1056, v1057); real2 v1211 = reverse(minus(v1057, v1056)); real2 v1297 = plus(v1216, v1217); real2 v1291 = reverse(minus(v1217, v1216)); real2 v2691 = plus(v429, v749); real2 v2685 = reverse(minus(v749, v429)); real2 v2765 = reverse(minus(v2691, v2690)); real2 v2771 = plus(v2690, v2691); real2 v2689 = minusplus(uminus(v2685), v2686); real2 v2687 = minusplus(v2685, v2686); real2 v2703 = ctimesminusplus(reverse(v2689), tbl[476 + tbloffset], ctimes(v2689, tbl[477 + tbloffset])); real2 v2697 = ctimesminusplus(reverse(v2687), tbl[474 + tbloffset], ctimes(v2687, tbl[475 + tbloffset])); real2 v1215 = minusplus(uminus(v1211), v1212); real2 v1213 = minusplus(v1211, v1212); real2 v1223 = ctimesminusplus(reverse(v1213), tbl[218 + tbloffset], ctimes(v1213, tbl[219 + tbloffset])); real2 v1229 = ctimesminusplus(reverse(v1215), tbl[220 + tbloffset], ctimes(v1215, tbl[221 + tbloffset])); real2 v423 = ctimesminusplus(reverse(v413), tbl[58 + tbloffset], ctimes(v413, tbl[59 + tbloffset])); real2 v2165 = plus(v423, v743); real2 v2159 = reverse(minus(v743, v423)); real2 v2245 = plus(v2164, v2165); real2 v2239 = reverse(minus(v2165, v2164)); real2 v44 = load(in, 42 << inShift); real2 v108 = load(in, 106 << inShift); real2 v331 = reverse(minus(v108, v44)); real2 v337 = plus(v44, v108); real2 v76 = load(in, 74 << inShift); real2 v12 = load(in, 10 << inShift); real2 v336 = plus(v12, v76); real2 v332 = minus(v76, v12); real2 v976 = plus(v336, v337); real2 v972 = minus(v337, v336); real2 v335 = minusplus(uminus(v331), v332); real2 v333 = minusplus(v331, v332); real2 v343 = ctimesminusplus(reverse(v333), tbl[42 + tbloffset], ctimes(v333, tbl[43 + tbloffset])); real2 v349 = ctimesminusplus(reverse(v335), tbl[44 + tbloffset], ctimes(v335, tbl[45 + tbloffset])); real2 v124 = load(in, 122 << inShift); real2 v60 = load(in, 58 << inShift); real2 v651 = reverse(minus(v124, v60)); real2 v657 = plus(v60, v124); real2 v28 = load(in, 26 << inShift); real2 v92 = load(in, 90 << inShift); real2 v652 = minus(v92, v28); real2 v656 = plus(v28, v92); real2 v977 = plus(v656, v657); real2 v971 = reverse(minus(v657, v656)); real2 v973 = minusplus(v971, v972); real2 v975 = minusplus(uminus(v971), v972); real2 v983 = ctimesminusplus(reverse(v973), tbl[170 + tbloffset], ctimes(v973, tbl[171 + tbloffset])); real2 v1131 = reverse(minus(v977, v976)); real2 v1137 = plus(v976, v977); real2 v655 = minusplus(uminus(v651), v652); real2 v653 = minusplus(v651, v652); real2 v669 = ctimesminusplus(reverse(v655), tbl[108 + tbloffset], ctimes(v655, tbl[109 + tbloffset])); real2 v663 = ctimesminusplus(reverse(v653), tbl[106 + tbloffset], ctimes(v653, tbl[107 + tbloffset])); real2 v2079 = reverse(minus(v663, v343)); real2 v2085 = plus(v343, v663); real2 v2605 = reverse(minus(v669, v349)); real2 v2611 = plus(v349, v669); real2 v989 = ctimesminusplus(reverse(v975), tbl[172 + tbloffset], ctimes(v975, tbl[173 + tbloffset])); real2 v20 = load(in, 18 << inShift); real2 v84 = load(in, 82 << inShift); real2 v496 = plus(v20, v84); real2 v492 = minus(v84, v20); real2 v52 = load(in, 50 << inShift); real2 v116 = load(in, 114 << inShift); real2 v491 = reverse(minus(v116, v52)); real2 v497 = plus(v52, v116); real2 v817 = plus(v496, v497); real2 v811 = reverse(minus(v497, v496)); real2 v493 = minusplus(v491, v492); real2 v495 = minusplus(uminus(v491), v492); real2 v509 = ctimesminusplus(reverse(v495), tbl[76 + tbloffset], ctimes(v495, tbl[77 + tbloffset])); real2 v503 = ctimesminusplus(reverse(v493), tbl[74 + tbloffset], ctimes(v493, tbl[75 + tbloffset])); real2 v36 = load(in, 34 << inShift); real2 v100 = load(in, 98 << inShift); real2 v171 = reverse(minus(v100, v36)); real2 v177 = plus(v36, v100); real2 v68 = load(in, 66 << inShift); real2 v4 = load(in, 2 << inShift); real2 v176 = plus(v4, v68); real2 v172 = minus(v68, v4); real2 v816 = plus(v176, v177); real2 v812 = minus(v177, v176); real2 v1136 = plus(v816, v817); real2 v1132 = minus(v817, v816); real2 v1133 = minusplus(v1131, v1132); real2 v1135 = minusplus(uminus(v1131), v1132); real2 v1149 = ctimesminusplus(reverse(v1135), tbl[204 + tbloffset], ctimes(v1135, tbl[205 + tbloffset])); real2 v1296 = plus(v1136, v1137); real2 v1292 = minus(v1137, v1136); real2 v1295 = minusplus(uminus(v1291), v1292); real2 v1293 = minusplus(v1291, v1292); real2 v1303 = ctimesminusplus(reverse(v1293), tbl[234 + tbloffset], ctimes(v1293, tbl[235 + tbloffset])); real2 v1331 = reverse(minus(v1297, v1296)); real2 v1337 = plus(v1296, v1297); real2 v173 = minusplus(v171, v172); real2 v175 = minusplus(uminus(v171), v172); real2 v189 = ctimesminusplus(reverse(v175), tbl[12 + tbloffset], ctimes(v175, tbl[13 + tbloffset])); real2 v1309 = ctimesminusplus(reverse(v1295), tbl[236 + tbloffset], ctimes(v1295, tbl[237 + tbloffset])); real2 v815 = minusplus(uminus(v811), v812); real2 v813 = minusplus(v811, v812); real2 v1143 = ctimesminusplus(reverse(v1133), tbl[202 + tbloffset], ctimes(v1133, tbl[203 + tbloffset])); real2 v1541 = reverse(minus(v1229, v1149)); real2 v1547 = plus(v1149, v1229); real2 v2610 = plus(v189, v509); real2 v2606 = minus(v509, v189); real2 v2770 = plus(v2610, v2611); real2 v2766 = minus(v2611, v2610); real2 v823 = ctimesminusplus(reverse(v813), tbl[138 + tbloffset], ctimes(v813, tbl[139 + tbloffset])); real2 v829 = ctimesminusplus(reverse(v815), tbl[140 + tbloffset], ctimes(v815, tbl[141 + tbloffset])); real2 v2811 = plus(v2770, v2771); real2 v2805 = reverse(minus(v2771, v2770)); real2 v2767 = minusplus(v2765, v2766); real2 v2769 = minusplus(uminus(v2765), v2766); real2 v2607 = minusplus(v2605, v2606); real2 v2609 = minusplus(uminus(v2605), v2606); real2 v2617 = ctimesminusplus(reverse(v2607), tbl[458 + tbloffset], ctimes(v2607, tbl[459 + tbloffset])); real2 v2623 = ctimesminusplus(reverse(v2609), tbl[460 + tbloffset], ctimes(v2609, tbl[461 + tbloffset])); real2 v3013 = reverse(minus(v2703, v2623)); real2 v3019 = plus(v2623, v2703); real2 v2783 = ctimesminusplus(reverse(v2769), tbl[492 + tbloffset], ctimes(v2769, tbl[493 + tbloffset])); real2 v2941 = plus(v2617, v2697); real2 v2935 = reverse(minus(v2697, v2617)); real2 v2777 = ctimesminusplus(reverse(v2767), tbl[490 + tbloffset], ctimes(v2767, tbl[491 + tbloffset])); real2 v1660 = minus(v983, v823); real2 v1664 = plus(v823, v983); real2 v1874 = plus(v829, v989); real2 v1870 = minus(v989, v829); real2 v1909 = reverse(minus(v1875, v1874)); real2 v1915 = plus(v1874, v1875); real2 v1663 = minusplus(uminus(v1659), v1660); real2 v1661 = minusplus(v1659, v1660); real2 v1677 = ctimesminusplus(reverse(v1663), tbl[296 + tbloffset], ctimes(v1663, tbl[297 + tbloffset])); real2 v1873 = minusplus(uminus(v1869), v1870); real2 v1871 = minusplus(v1869, v1870); real2 v1887 = ctimesminusplus(reverse(v1873), tbl[332 + tbloffset], ctimes(v1873, tbl[333 + tbloffset])); real2 v1705 = plus(v1664, v1665); real2 v1699 = reverse(minus(v1665, v1664)); real2 v1671 = ctimesminusplus(reverse(v1661), tbl[294 + tbloffset], ctimes(v1661, tbl[295 + tbloffset])); real2 v1881 = ctimesminusplus(reverse(v1871), tbl[330 + tbloffset], ctimes(v1871, tbl[331 + tbloffset])); real2 v1469 = plus(v1143, v1223); real2 v1463 = reverse(minus(v1223, v1143)); real2 v54 = load(in, 52 << inShift); real2 v118 = load(in, 116 << inShift); real2 v537 = plus(v54, v118); real2 v531 = reverse(minus(v118, v54)); real2 v86 = load(in, 84 << inShift); real2 v22 = load(in, 20 << inShift); real2 v536 = plus(v22, v86); real2 v532 = minus(v86, v22); real2 v851 = reverse(minus(v537, v536)); real2 v857 = plus(v536, v537); real2 v533 = minusplus(v531, v532); real2 v535 = minusplus(uminus(v531), v532); real2 v549 = ctimesminusplus(reverse(v535), tbl[84 + tbloffset], ctimes(v535, tbl[85 + tbloffset])); real2 v102 = load(in, 100 << inShift); real2 v38 = load(in, 36 << inShift); real2 v217 = plus(v38, v102); real2 v211 = reverse(minus(v102, v38)); real2 v70 = load(in, 68 << inShift); real2 v6 = load(in, 4 << inShift); real2 v216 = plus(v6, v70); real2 v212 = minus(v70, v6); real2 v213 = minusplus(v211, v212); real2 v215 = minusplus(uminus(v211), v212); real2 v229 = ctimesminusplus(reverse(v215), tbl[20 + tbloffset], ctimes(v215, tbl[21 + tbloffset])); real2 v2646 = minus(v549, v229); real2 v2650 = plus(v229, v549); real2 v856 = plus(v216, v217); real2 v852 = minus(v217, v216); real2 v853 = minusplus(v851, v852); real2 v855 = minusplus(uminus(v851), v852); real2 v863 = ctimesminusplus(reverse(v853), tbl[146 + tbloffset], ctimes(v853, tbl[147 + tbloffset])); real2 v869 = ctimesminusplus(reverse(v855), tbl[148 + tbloffset], ctimes(v855, tbl[149 + tbloffset])); real2 v1176 = plus(v856, v857); real2 v1172 = minus(v857, v856); real2 v110 = load(in, 108 << inShift); real2 v46 = load(in, 44 << inShift); real2 v377 = plus(v46, v110); real2 v371 = reverse(minus(v110, v46)); real2 v78 = load(in, 76 << inShift); real2 v14 = load(in, 12 << inShift); real2 v372 = minus(v78, v14); real2 v376 = plus(v14, v78); real2 v1012 = minus(v377, v376); real2 v1016 = plus(v376, v377); real2 v373 = minusplus(v371, v372); real2 v375 = minusplus(uminus(v371), v372); real2 v389 = ctimesminusplus(reverse(v375), tbl[52 + tbloffset], ctimes(v375, tbl[53 + tbloffset])); real2 v30 = load(in, 28 << inShift); real2 v94 = load(in, 92 << inShift); real2 v696 = plus(v30, v94); real2 v692 = minus(v94, v30); real2 v62 = load(in, 60 << inShift); real2 v126 = load(in, 124 << inShift); real2 v697 = plus(v62, v126); real2 v691 = reverse(minus(v126, v62)); real2 v1017 = plus(v696, v697); real2 v1011 = reverse(minus(v697, v696)); real2 v1171 = reverse(minus(v1017, v1016)); real2 v1177 = plus(v1016, v1017); real2 v1013 = minusplus(v1011, v1012); real2 v1015 = minusplus(uminus(v1011), v1012); real2 v1175 = minusplus(uminus(v1171), v1172); real2 v1173 = minusplus(v1171, v1172); real2 v1183 = ctimesminusplus(reverse(v1173), tbl[210 + tbloffset], ctimes(v1173, tbl[211 + tbloffset])); real2 v1189 = ctimesminusplus(reverse(v1175), tbl[212 + tbloffset], ctimes(v1175, tbl[213 + tbloffset])); real2 v1029 = ctimesminusplus(reverse(v1015), tbl[180 + tbloffset], ctimes(v1015, tbl[181 + tbloffset])); real2 v1023 = ctimesminusplus(reverse(v1013), tbl[178 + tbloffset], ctimes(v1013, tbl[179 + tbloffset])); real2 v1625 = plus(v863, v1023); real2 v1619 = reverse(minus(v1023, v863)); real2 v1835 = plus(v869, v1029); real2 v1829 = reverse(minus(v1029, v869)); real2 v693 = minusplus(v691, v692); real2 v695 = minusplus(uminus(v691), v692); real2 v709 = ctimesminusplus(reverse(v695), tbl[116 + tbloffset], ctimes(v695, tbl[117 + tbloffset])); real2 v2645 = reverse(minus(v709, v389)); real2 v2651 = plus(v389, v709); real2 v1257 = plus(v1176, v1177); real2 v1251 = reverse(minus(v1177, v1176)); real2 v2731 = plus(v2650, v2651); real2 v2725 = reverse(minus(v2651, v2650)); real2 v114 = load(in, 112 << inShift); real2 v50 = load(in, 48 << inShift); real2 v457 = plus(v50, v114); real2 v451 = reverse(minus(v114, v50)); real2 v18 = load(in, 16 << inShift); real2 v82 = load(in, 80 << inShift); real2 v456 = plus(v18, v82); real2 v452 = minus(v82, v18); real2 v771 = reverse(minus(v457, v456)); real2 v777 = plus(v456, v457); real2 v453 = minusplus(v451, v452); real2 v455 = minusplus(uminus(v451), v452); real2 v469 = ctimesminusplus(reverse(v455), tbl[68 + tbloffset], ctimes(v455, tbl[69 + tbloffset])); real2 v66 = load(in, 64 << inShift); real2 v2 = load(in, 0 << inShift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << inShift); real2 v34 = load(in, 32 << inShift); real2 v131 = reverse(minus(v98, v34)); real2 v137 = plus(v34, v98); real2 v133 = minusplus(v131, v132); real2 v135 = minusplus(uminus(v131), v132); real2 v149 = ctimesminusplus(reverse(v135), tbl[4 + tbloffset], ctimes(v135, tbl[5 + tbloffset])); real2 v2566 = minus(v469, v149); real2 v2570 = plus(v149, v469); real2 v772 = minus(v137, v136); real2 v776 = plus(v136, v137); real2 v1092 = minus(v777, v776); real2 v1096 = plus(v776, v777); real2 v773 = minusplus(v771, v772); real2 v775 = minusplus(uminus(v771), v772); real2 v783 = ctimesminusplus(reverse(v773), tbl[130 + tbloffset], ctimes(v773, tbl[131 + tbloffset])); real2 v789 = ctimesminusplus(reverse(v775), tbl[132 + tbloffset], ctimes(v775, tbl[133 + tbloffset])); real2 v74 = load(in, 72 << inShift); real2 v10 = load(in, 8 << inShift); real2 v296 = plus(v10, v74); real2 v292 = minus(v74, v10); real2 v42 = load(in, 40 << inShift); real2 v106 = load(in, 104 << inShift); real2 v291 = reverse(minus(v106, v42)); real2 v297 = plus(v42, v106); real2 v293 = minusplus(v291, v292); real2 v295 = minusplus(uminus(v291), v292); real2 v309 = ctimesminusplus(reverse(v295), tbl[36 + tbloffset], ctimes(v295, tbl[37 + tbloffset])); real2 v932 = minus(v297, v296); real2 v936 = plus(v296, v297); real2 v122 = load(in, 120 << inShift); real2 v58 = load(in, 56 << inShift); real2 v617 = plus(v58, v122); real2 v611 = reverse(minus(v122, v58)); real2 v26 = load(in, 24 << inShift); real2 v90 = load(in, 88 << inShift); real2 v612 = minus(v90, v26); real2 v616 = plus(v26, v90); real2 v937 = plus(v616, v617); real2 v931 = reverse(minus(v617, v616)); real2 v1091 = reverse(minus(v937, v936)); real2 v1097 = plus(v936, v937); real2 v933 = minusplus(v931, v932); real2 v935 = minusplus(uminus(v931), v932); real2 v1093 = minusplus(v1091, v1092); real2 v1095 = minusplus(uminus(v1091), v1092); real2 v1103 = ctimesminusplus(reverse(v1093), tbl[194 + tbloffset], ctimes(v1093, tbl[195 + tbloffset])); real2 v1468 = plus(v1103, v1183); real2 v1464 = minus(v1183, v1103); real2 v1508 = plus(v1468, v1469); real2 v1504 = minus(v1469, v1468); real2 v1252 = minus(v1097, v1096); real2 v1256 = plus(v1096, v1097); real2 v1336 = plus(v1256, v1257); real2 v1332 = minus(v1257, v1256); real2 v1335 = minusplus(uminus(v1331), v1332); real2 v1333 = minusplus(v1331, v1332); real2 v1343 = ctimesminusplus(reverse(v1333), tbl[242 + tbloffset], ctimes(v1333, tbl[243 + tbloffset])); real2 v1349 = ctimesminusplus(reverse(v1335), tbl[244 + tbloffset], ctimes(v1335, tbl[245 + tbloffset])); real2 v1376 = plus(v1336, v1337); real2 v1372 = minus(v1337, v1336); real2 v1465 = minusplus(v1463, v1464); real2 v1467 = minusplus(uminus(v1463), v1464); real2 v1255 = minusplus(uminus(v1251), v1252); real2 v1253 = minusplus(v1251, v1252); real2 v1481 = ctimesminusplus(reverse(v1467), tbl[264 + tbloffset], ctimes(v1467, tbl[265 + tbloffset])); real2 v1475 = ctimesminusplus(reverse(v1465), tbl[262 + tbloffset], ctimes(v1465, tbl[263 + tbloffset])); real2 v1109 = ctimesminusplus(reverse(v1095), tbl[196 + tbloffset], ctimes(v1095, tbl[197 + tbloffset])); real2 v1542 = minus(v1189, v1109); real2 v1546 = plus(v1109, v1189); real2 v1545 = minusplus(uminus(v1541), v1542); real2 v1543 = minusplus(v1541, v1542); real2 v1553 = ctimesminusplus(reverse(v1543), tbl[274 + tbloffset], ctimes(v1543, tbl[275 + tbloffset])); real2 v1559 = ctimesminusplus(reverse(v1545), tbl[276 + tbloffset], ctimes(v1545, tbl[277 + tbloffset])); real2 v1582 = minus(v1547, v1546); real2 v1586 = plus(v1546, v1547); real2 v1269 = ctimesminusplus(reverse(v1255), tbl[228 + tbloffset], ctimes(v1255, tbl[229 + tbloffset])); real2 v1438 = minus(v1309, v1269); real2 v1442 = plus(v1269, v1309); real2 v1263 = ctimesminusplus(reverse(v1253), tbl[226 + tbloffset], ctimes(v1253, tbl[227 + tbloffset])); real2 v943 = ctimesminusplus(reverse(v933), tbl[162 + tbloffset], ctimes(v933, tbl[163 + tbloffset])); real2 v1624 = plus(v783, v943); real2 v1620 = minus(v943, v783); real2 v1623 = minusplus(uminus(v1619), v1620); real2 v1621 = minusplus(v1619, v1620); real2 v1700 = minus(v1625, v1624); real2 v1704 = plus(v1624, v1625); real2 v1631 = ctimesminusplus(reverse(v1621), tbl[286 + tbloffset], ctimes(v1621, tbl[287 + tbloffset])); real2 v949 = ctimesminusplus(reverse(v935), tbl[164 + tbloffset], ctimes(v935, tbl[165 + tbloffset])); real2 v1830 = minus(v949, v789); real2 v1834 = plus(v789, v949); real2 v1782 = plus(v1631, v1671); real2 v1778 = minus(v1671, v1631); real2 v1910 = minus(v1835, v1834); real2 v1914 = plus(v1834, v1835); real2 v1950 = minus(v1915, v1914); real2 v1954 = plus(v1914, v1915); real2 v1913 = minusplus(uminus(v1909), v1910); real2 v1911 = minusplus(v1909, v1910); real2 v613 = minusplus(v611, v612); real2 v615 = minusplus(uminus(v611), v612); real2 v629 = ctimesminusplus(reverse(v615), tbl[100 + tbloffset], ctimes(v615, tbl[101 + tbloffset])); real2 v1744 = plus(v1704, v1705); real2 v1740 = minus(v1705, v1704); real2 v1637 = ctimesminusplus(reverse(v1623), tbl[288 + tbloffset], ctimes(v1623, tbl[289 + tbloffset])); real2 v1927 = ctimesminusplus(reverse(v1913), tbl[340 + tbloffset], ctimes(v1913, tbl[341 + tbloffset])); real2 v2571 = plus(v309, v629); real2 v2565 = reverse(minus(v629, v309)); real2 v1833 = minusplus(uminus(v1829), v1830); real2 v1831 = minusplus(v1829, v1830); real2 v1921 = ctimesminusplus(reverse(v1911), tbl[338 + tbloffset], ctimes(v1911, tbl[339 + tbloffset])); real2 v1804 = minus(v1677, v1637); real2 v1808 = plus(v1637, v1677); real2 v1847 = ctimesminusplus(reverse(v1833), tbl[324 + tbloffset], ctimes(v1833, tbl[325 + tbloffset])); real2 v2014 = minus(v1887, v1847); real2 v2018 = plus(v1847, v1887); real2 v1841 = ctimesminusplus(reverse(v1831), tbl[322 + tbloffset], ctimes(v1831, tbl[323 + tbloffset])); real2 v1988 = minus(v1881, v1841); real2 v1992 = plus(v1841, v1881); real2 v1703 = minusplus(uminus(v1699), v1700); real2 v1701 = minusplus(v1699, v1700); real2 v1717 = ctimesminusplus(reverse(v1703), tbl[304 + tbloffset], ctimes(v1703, tbl[305 + tbloffset])); real2 v1711 = ctimesminusplus(reverse(v1701), tbl[302 + tbloffset], ctimes(v1701, tbl[303 + tbloffset])); real2 v2730 = plus(v2570, v2571); real2 v2726 = minus(v2571, v2570); real2 v1412 = minus(v1303, v1263); real2 v1416 = plus(v1263, v1303); real2 v63 = load(in, 61 << inShift); real2 v127 = load(in, 125 << inShift); real2 v717 = plus(v63, v127); real2 v711 = reverse(minus(v127, v63)); real2 v95 = load(in, 93 << inShift); real2 v31 = load(in, 29 << inShift); real2 v712 = minus(v95, v31); real2 v716 = plus(v31, v95); real2 v1037 = plus(v716, v717); real2 v1031 = reverse(minus(v717, v716)); real2 v79 = load(in, 77 << inShift); real2 v15 = load(in, 13 << inShift); real2 v396 = plus(v15, v79); real2 v392 = minus(v79, v15); real2 v111 = load(in, 109 << inShift); real2 v47 = load(in, 45 << inShift); real2 v397 = plus(v47, v111); real2 v391 = reverse(minus(v111, v47)); real2 v1032 = minus(v397, v396); real2 v1036 = plus(v396, v397); real2 v1033 = minusplus(v1031, v1032); real2 v1035 = minusplus(uminus(v1031), v1032); real2 v1049 = ctimesminusplus(reverse(v1035), tbl[184 + tbloffset], ctimes(v1035, tbl[185 + tbloffset])); real2 v1043 = ctimesminusplus(reverse(v1033), tbl[182 + tbloffset], ctimes(v1033, tbl[183 + tbloffset])); real2 v1197 = plus(v1036, v1037); real2 v1191 = reverse(minus(v1037, v1036)); real2 v23 = load(in, 21 << inShift); real2 v87 = load(in, 85 << inShift); real2 v556 = plus(v23, v87); real2 v552 = minus(v87, v23); real2 v119 = load(in, 117 << inShift); real2 v55 = load(in, 53 << inShift); real2 v557 = plus(v55, v119); real2 v551 = reverse(minus(v119, v55)); real2 v877 = plus(v556, v557); real2 v871 = reverse(minus(v557, v556)); real2 v7 = load(in, 5 << inShift); real2 v71 = load(in, 69 << inShift); real2 v232 = minus(v71, v7); real2 v236 = plus(v7, v71); real2 v103 = load(in, 101 << inShift); real2 v39 = load(in, 37 << inShift); real2 v237 = plus(v39, v103); real2 v231 = reverse(minus(v103, v39)); real2 v876 = plus(v236, v237); real2 v872 = minus(v237, v236); real2 v1192 = minus(v877, v876); real2 v1196 = plus(v876, v877); real2 v1271 = reverse(minus(v1197, v1196)); real2 v1277 = plus(v1196, v1197); real2 v875 = minusplus(uminus(v871), v872); real2 v873 = minusplus(v871, v872); real2 v883 = ctimesminusplus(reverse(v873), tbl[150 + tbloffset], ctimes(v873, tbl[151 + tbloffset])); real2 v1639 = reverse(minus(v1043, v883)); real2 v1645 = plus(v883, v1043); real2 v1195 = minusplus(uminus(v1191), v1192); real2 v1193 = minusplus(v1191, v1192); real2 v1209 = ctimesminusplus(reverse(v1195), tbl[216 + tbloffset], ctimes(v1195, tbl[217 + tbloffset])); real2 v1203 = ctimesminusplus(reverse(v1193), tbl[214 + tbloffset], ctimes(v1193, tbl[215 + tbloffset])); real2 v83 = load(in, 81 << inShift); real2 v19 = load(in, 17 << inShift); real2 v476 = plus(v19, v83); real2 v472 = minus(v83, v19); real2 v51 = load(in, 49 << inShift); real2 v115 = load(in, 113 << inShift); real2 v477 = plus(v51, v115); real2 v471 = reverse(minus(v115, v51)); real2 v797 = plus(v476, v477); real2 v791 = reverse(minus(v477, v476)); real2 v3 = load(in, 1 << inShift); real2 v67 = load(in, 65 << inShift); real2 v156 = plus(v3, v67); real2 v152 = minus(v67, v3); real2 v35 = load(in, 33 << inShift); real2 v99 = load(in, 97 << inShift); real2 v157 = plus(v35, v99); real2 v151 = reverse(minus(v99, v35)); real2 v792 = minus(v157, v156); real2 v796 = plus(v156, v157); real2 v793 = minusplus(v791, v792); real2 v795 = minusplus(uminus(v791), v792); real2 v803 = ctimesminusplus(reverse(v793), tbl[134 + tbloffset], ctimes(v793, tbl[135 + tbloffset])); real2 v1112 = minus(v797, v796); real2 v1116 = plus(v796, v797); real2 v107 = load(in, 105 << inShift); real2 v43 = load(in, 41 << inShift); real2 v317 = plus(v43, v107); real2 v311 = reverse(minus(v107, v43)); real2 v75 = load(in, 73 << inShift); real2 v11 = load(in, 9 << inShift); real2 v316 = plus(v11, v75); real2 v312 = minus(v75, v11); real2 v956 = plus(v316, v317); real2 v952 = minus(v317, v316); real2 v59 = load(in, 57 << inShift); real2 v123 = load(in, 121 << inShift); real2 v631 = reverse(minus(v123, v59)); real2 v637 = plus(v59, v123); real2 v27 = load(in, 25 << inShift); real2 v91 = load(in, 89 << inShift); real2 v636 = plus(v27, v91); real2 v632 = minus(v91, v27); real2 v957 = plus(v636, v637); real2 v951 = reverse(minus(v637, v636)); real2 v1111 = reverse(minus(v957, v956)); real2 v1117 = plus(v956, v957); real2 v1276 = plus(v1116, v1117); real2 v1272 = minus(v1117, v1116); real2 v1275 = minusplus(uminus(v1271), v1272); real2 v1273 = minusplus(v1271, v1272); real2 v1283 = ctimesminusplus(reverse(v1273), tbl[230 + tbloffset], ctimes(v1273, tbl[231 + tbloffset])); real2 v1352 = minus(v1277, v1276); real2 v1356 = plus(v1276, v1277); real2 v1289 = ctimesminusplus(reverse(v1275), tbl[232 + tbloffset], ctimes(v1275, tbl[233 + tbloffset])); real2 v1115 = minusplus(uminus(v1111), v1112); real2 v1113 = minusplus(v1111, v1112); real2 v1123 = ctimesminusplus(reverse(v1113), tbl[198 + tbloffset], ctimes(v1113, tbl[199 + tbloffset])); real2 v1129 = ctimesminusplus(reverse(v1115), tbl[200 + tbloffset], ctimes(v1115, tbl[201 + tbloffset])); real2 v1488 = plus(v1123, v1203); real2 v1484 = minus(v1203, v1123); real2 v1566 = plus(v1129, v1209); real2 v1562 = minus(v1209, v1129); real2 v85 = load(in, 83 << inShift); real2 v21 = load(in, 19 << inShift); real2 v512 = minus(v85, v21); real2 v516 = plus(v21, v85); real2 v117 = load(in, 115 << inShift); real2 v53 = load(in, 51 << inShift); real2 v517 = plus(v53, v117); real2 v511 = reverse(minus(v117, v53)); real2 v831 = reverse(minus(v517, v516)); real2 v837 = plus(v516, v517); real2 v69 = load(in, 67 << inShift); real2 v5 = load(in, 3 << inShift); real2 v192 = minus(v69, v5); real2 v196 = plus(v5, v69); real2 v37 = load(in, 35 << inShift); real2 v101 = load(in, 99 << inShift); real2 v197 = plus(v37, v101); real2 v191 = reverse(minus(v101, v37)); real2 v832 = minus(v197, v196); real2 v836 = plus(v196, v197); real2 v1152 = minus(v837, v836); real2 v1156 = plus(v836, v837); real2 v61 = load(in, 59 << inShift); real2 v125 = load(in, 123 << inShift); real2 v677 = plus(v61, v125); real2 v671 = reverse(minus(v125, v61)); real2 v29 = load(in, 27 << inShift); real2 v93 = load(in, 91 << inShift); real2 v672 = minus(v93, v29); real2 v676 = plus(v29, v93); real2 v997 = plus(v676, v677); real2 v991 = reverse(minus(v677, v676)); real2 v109 = load(in, 107 << inShift); real2 v45 = load(in, 43 << inShift); real2 v357 = plus(v45, v109); real2 v351 = reverse(minus(v109, v45)); real2 v77 = load(in, 75 << inShift); real2 v13 = load(in, 11 << inShift); real2 v352 = minus(v77, v13); real2 v356 = plus(v13, v77); real2 v992 = minus(v357, v356); real2 v996 = plus(v356, v357); real2 v1157 = plus(v996, v997); real2 v1151 = reverse(minus(v997, v996)); real2 v1155 = minusplus(uminus(v1151), v1152); real2 v1153 = minusplus(v1151, v1152); real2 v1163 = ctimesminusplus(reverse(v1153), tbl[206 + tbloffset], ctimes(v1153, tbl[207 + tbloffset])); real2 v1316 = plus(v1156, v1157); real2 v1312 = minus(v1157, v1156); real2 v41 = load(in, 39 << inShift); real2 v105 = load(in, 103 << inShift); real2 v277 = plus(v41, v105); real2 v271 = reverse(minus(v105, v41)); real2 v9 = load(in, 7 << inShift); real2 v73 = load(in, 71 << inShift); real2 v276 = plus(v9, v73); real2 v272 = minus(v73, v9); real2 v916 = plus(v276, v277); real2 v912 = minus(v277, v276); real2 v89 = load(in, 87 << inShift); real2 v25 = load(in, 23 << inShift); real2 v592 = minus(v89, v25); real2 v596 = plus(v25, v89); real2 v57 = load(in, 55 << inShift); real2 v121 = load(in, 119 << inShift); real2 v591 = reverse(minus(v121, v57)); real2 v597 = plus(v57, v121); real2 v911 = reverse(minus(v597, v596)); real2 v917 = plus(v596, v597); real2 v1236 = plus(v916, v917); real2 v1232 = minus(v917, v916); real2 v81 = load(in, 79 << inShift); real2 v17 = load(in, 15 << inShift); real2 v432 = minus(v81, v17); real2 v436 = plus(v17, v81); real2 v113 = load(in, 111 << inShift); real2 v49 = load(in, 47 << inShift); real2 v437 = plus(v49, v113); real2 v431 = reverse(minus(v113, v49)); real2 v1072 = minus(v437, v436); real2 v1076 = plus(v436, v437); real2 v65 = load(in, 63 << inShift); real2 v129 = load(in, 127 << inShift); real2 v757 = plus(v65, v129); real2 v751 = reverse(minus(v129, v65)); real2 v97 = load(in, 95 << inShift); real2 v33 = load(in, 31 << inShift); real2 v752 = minus(v97, v33); real2 v756 = plus(v33, v97); real2 v1077 = plus(v756, v757); real2 v1071 = reverse(minus(v757, v756)); real2 v1231 = reverse(minus(v1077, v1076)); real2 v1237 = plus(v1076, v1077); real2 v1317 = plus(v1236, v1237); real2 v1311 = reverse(minus(v1237, v1236)); real2 v1351 = reverse(minus(v1317, v1316)); real2 v1357 = plus(v1316, v1317); real2 v1371 = reverse(minus(v1357, v1356)); real2 v1377 = plus(v1356, v1357); store(out, 0 << outShift, plus(v1376, v1377)); real2 v1390 = minus(v1376, v1377); store(out, 64 << outShift, ctimesminusplus(v1390, tbl[0 + tbloffset], ctimes(reverse(v1390), tbl[1 + tbloffset]))); real2 v1353 = minusplus(v1351, v1352); real2 v1355 = minusplus(uminus(v1351), v1352); real2 v1369 = ctimesminusplus(reverse(v1355), tbl[248 + tbloffset], ctimes(v1355, tbl[249 + tbloffset])); store(out, 48 << outShift, plus(v1349, v1369)); real2 v1404 = minus(v1349, v1369); store(out, 112 << outShift, ctimesminusplus(v1404, tbl[0 + tbloffset], ctimes(reverse(v1404), tbl[1 + tbloffset]))); real2 v1363 = ctimesminusplus(reverse(v1353), tbl[246 + tbloffset], ctimes(v1353, tbl[247 + tbloffset])); store(out, 16 << outShift, plus(v1343, v1363)); real2 v1398 = minus(v1343, v1363); store(out, 80 << outShift, ctimesminusplus(v1398, tbl[0 + tbloffset], ctimes(reverse(v1398), tbl[1 + tbloffset]))); real2 v1373 = minusplus(v1371, v1372); real2 v1375 = minusplus(uminus(v1371), v1372); store(out, 96 << outShift, ctimesminusplus(reverse(v1375), tbl[252 + tbloffset], ctimes(v1375, tbl[253 + tbloffset]))); store(out, 32 << outShift, ctimesminusplus(reverse(v1373), tbl[250 + tbloffset], ctimes(v1373, tbl[251 + tbloffset]))); real2 v1313 = minusplus(v1311, v1312); real2 v1315 = minusplus(uminus(v1311), v1312); real2 v1323 = ctimesminusplus(reverse(v1313), tbl[238 + tbloffset], ctimes(v1313, tbl[239 + tbloffset])); real2 v1417 = plus(v1283, v1323); real2 v1411 = reverse(minus(v1323, v1283)); store(out, 8 << outShift, plus(v1416, v1417)); real2 v1430 = minus(v1416, v1417); store(out, 72 << outShift, ctimesminusplus(v1430, tbl[0 + tbloffset], ctimes(reverse(v1430), tbl[1 + tbloffset]))); real2 v1413 = minusplus(v1411, v1412); real2 v1415 = minusplus(uminus(v1411), v1412); store(out, 104 << outShift, ctimesminusplus(reverse(v1415), tbl[256 + tbloffset], ctimes(v1415, tbl[257 + tbloffset]))); store(out, 40 << outShift, ctimesminusplus(reverse(v1413), tbl[254 + tbloffset], ctimes(v1413, tbl[255 + tbloffset]))); real2 v1329 = ctimesminusplus(reverse(v1315), tbl[240 + tbloffset], ctimes(v1315, tbl[241 + tbloffset])); real2 v1443 = plus(v1289, v1329); real2 v1437 = reverse(minus(v1329, v1289)); store(out, 24 << outShift, plus(v1442, v1443)); real2 v1456 = minus(v1442, v1443); store(out, 88 << outShift, ctimesminusplus(v1456, tbl[0 + tbloffset], ctimes(reverse(v1456), tbl[1 + tbloffset]))); real2 v1441 = minusplus(uminus(v1437), v1438); real2 v1439 = minusplus(v1437, v1438); store(out, 120 << outShift, ctimesminusplus(reverse(v1441), tbl[260 + tbloffset], ctimes(v1441, tbl[261 + tbloffset]))); store(out, 56 << outShift, ctimesminusplus(reverse(v1439), tbl[258 + tbloffset], ctimes(v1439, tbl[259 + tbloffset]))); real2 v1235 = minusplus(uminus(v1231), v1232); real2 v1233 = minusplus(v1231, v1232); real2 v1243 = ctimesminusplus(reverse(v1233), tbl[222 + tbloffset], ctimes(v1233, tbl[223 + tbloffset])); real2 v1489 = plus(v1163, v1243); real2 v1483 = reverse(minus(v1243, v1163)); real2 v1509 = plus(v1488, v1489); real2 v1503 = reverse(minus(v1489, v1488)); store(out, 4 << outShift, plus(v1508, v1509)); real2 v1522 = minus(v1508, v1509); store(out, 68 << outShift, ctimesminusplus(v1522, tbl[0 + tbloffset], ctimes(reverse(v1522), tbl[1 + tbloffset]))); real2 v1507 = minusplus(uminus(v1503), v1504); real2 v1505 = minusplus(v1503, v1504); store(out, 36 << outShift, ctimesminusplus(reverse(v1505), tbl[270 + tbloffset], ctimes(v1505, tbl[271 + tbloffset]))); store(out, 100 << outShift, ctimesminusplus(reverse(v1507), tbl[272 + tbloffset], ctimes(v1507, tbl[273 + tbloffset]))); real2 v1485 = minusplus(v1483, v1484); real2 v1487 = minusplus(uminus(v1483), v1484); real2 v1501 = ctimesminusplus(reverse(v1487), tbl[268 + tbloffset], ctimes(v1487, tbl[269 + tbloffset])); store(out, 52 << outShift, plus(v1481, v1501)); real2 v1534 = minus(v1481, v1501); store(out, 116 << outShift, ctimesminusplus(v1534, tbl[0 + tbloffset], ctimes(reverse(v1534), tbl[1 + tbloffset]))); real2 v1495 = ctimesminusplus(reverse(v1485), tbl[266 + tbloffset], ctimes(v1485, tbl[267 + tbloffset])); store(out, 20 << outShift, plus(v1475, v1495)); real2 v1528 = minus(v1475, v1495); store(out, 84 << outShift, ctimesminusplus(v1528, tbl[0 + tbloffset], ctimes(reverse(v1528), tbl[1 + tbloffset]))); real2 v1249 = ctimesminusplus(reverse(v1235), tbl[224 + tbloffset], ctimes(v1235, tbl[225 + tbloffset])); real2 v1169 = ctimesminusplus(reverse(v1155), tbl[208 + tbloffset], ctimes(v1155, tbl[209 + tbloffset])); real2 v1567 = plus(v1169, v1249); real2 v1561 = reverse(minus(v1249, v1169)); real2 v1581 = reverse(minus(v1567, v1566)); real2 v1587 = plus(v1566, v1567); store(out, 12 << outShift, plus(v1586, v1587)); real2 v1600 = minus(v1586, v1587); store(out, 76 << outShift, ctimesminusplus(v1600, tbl[0 + tbloffset], ctimes(reverse(v1600), tbl[1 + tbloffset]))); real2 v1583 = minusplus(v1581, v1582); store(out, 44 << outShift, ctimesminusplus(reverse(v1583), tbl[282 + tbloffset], ctimes(v1583, tbl[283 + tbloffset]))); real2 v1585 = minusplus(uminus(v1581), v1582); store(out, 108 << outShift, ctimesminusplus(reverse(v1585), tbl[284 + tbloffset], ctimes(v1585, tbl[285 + tbloffset]))); real2 v1565 = minusplus(uminus(v1561), v1562); real2 v1563 = minusplus(v1561, v1562); real2 v1579 = ctimesminusplus(reverse(v1565), tbl[280 + tbloffset], ctimes(v1565, tbl[281 + tbloffset])); store(out, 60 << outShift, plus(v1559, v1579)); real2 v1612 = minus(v1559, v1579); store(out, 124 << outShift, ctimesminusplus(v1612, tbl[0 + tbloffset], ctimes(reverse(v1612), tbl[1 + tbloffset]))); real2 v1573 = ctimesminusplus(reverse(v1563), tbl[278 + tbloffset], ctimes(v1563, tbl[279 + tbloffset])); store(out, 28 << outShift, plus(v1553, v1573)); real2 v1606 = minus(v1553, v1573); store(out, 92 << outShift, ctimesminusplus(v1606, tbl[0 + tbloffset], ctimes(reverse(v1606), tbl[1 + tbloffset]))); real2 v833 = minusplus(v831, v832); real2 v835 = minusplus(uminus(v831), v832); real2 v955 = minusplus(uminus(v951), v952); real2 v953 = minusplus(v951, v952); real2 v963 = ctimesminusplus(reverse(v953), tbl[166 + tbloffset], ctimes(v953, tbl[167 + tbloffset])); real2 v995 = minusplus(uminus(v991), v992); real2 v993 = minusplus(v991, v992); real2 v1003 = ctimesminusplus(reverse(v993), tbl[174 + tbloffset], ctimes(v993, tbl[175 + tbloffset])); real2 v843 = ctimesminusplus(reverse(v833), tbl[142 + tbloffset], ctimes(v833, tbl[143 + tbloffset])); real2 v1640 = minus(v963, v803); real2 v1644 = plus(v803, v963); real2 v1680 = minus(v1003, v843); real2 v1684 = plus(v843, v1003); real2 v1641 = minusplus(v1639, v1640); real2 v1643 = minusplus(uminus(v1639), v1640); real2 v1657 = ctimesminusplus(reverse(v1643), tbl[292 + tbloffset], ctimes(v1643, tbl[293 + tbloffset])); real2 v913 = minusplus(v911, v912); real2 v915 = minusplus(uminus(v911), v912); real2 v1073 = minusplus(v1071, v1072); real2 v1075 = minusplus(uminus(v1071), v1072); real2 v923 = ctimesminusplus(reverse(v913), tbl[158 + tbloffset], ctimes(v913, tbl[159 + tbloffset])); real2 v1083 = ctimesminusplus(reverse(v1073), tbl[190 + tbloffset], ctimes(v1073, tbl[191 + tbloffset])); real2 v1685 = plus(v923, v1083); real2 v1679 = reverse(minus(v1083, v923)); real2 v1681 = minusplus(v1679, v1680); real2 v1683 = minusplus(uminus(v1679), v1680); real2 v1697 = ctimesminusplus(reverse(v1683), tbl[300 + tbloffset], ctimes(v1683, tbl[301 + tbloffset])); real2 v1809 = plus(v1657, v1697); real2 v1803 = reverse(minus(v1697, v1657)); store(out, 26 << outShift, plus(v1808, v1809)); real2 v1822 = minus(v1808, v1809); store(out, 90 << outShift, ctimesminusplus(v1822, tbl[0 + tbloffset], ctimes(reverse(v1822), tbl[1 + tbloffset]))); real2 v1807 = minusplus(uminus(v1803), v1804); real2 v1805 = minusplus(v1803, v1804); store(out, 58 << outShift, ctimesminusplus(reverse(v1805), tbl[318 + tbloffset], ctimes(v1805, tbl[319 + tbloffset]))); store(out, 122 << outShift, ctimesminusplus(reverse(v1807), tbl[320 + tbloffset], ctimes(v1807, tbl[321 + tbloffset]))); real2 v1651 = ctimesminusplus(reverse(v1641), tbl[290 + tbloffset], ctimes(v1641, tbl[291 + tbloffset])); real2 v1691 = ctimesminusplus(reverse(v1681), tbl[298 + tbloffset], ctimes(v1681, tbl[299 + tbloffset])); real2 v1783 = plus(v1651, v1691); real2 v1777 = reverse(minus(v1691, v1651)); real2 v1779 = minusplus(v1777, v1778); real2 v1781 = minusplus(uminus(v1777), v1778); store(out, 106 << outShift, ctimesminusplus(reverse(v1781), tbl[316 + tbloffset], ctimes(v1781, tbl[317 + tbloffset]))); store(out, 42 << outShift, ctimesminusplus(reverse(v1779), tbl[314 + tbloffset], ctimes(v1779, tbl[315 + tbloffset]))); store(out, 10 << outShift, plus(v1782, v1783)); real2 v1796 = minus(v1782, v1783); store(out, 74 << outShift, ctimesminusplus(v1796, tbl[0 + tbloffset], ctimes(reverse(v1796), tbl[1 + tbloffset]))); real2 v1720 = minus(v1645, v1644); real2 v1724 = plus(v1644, v1645); real2 v1719 = reverse(minus(v1685, v1684)); real2 v1725 = plus(v1684, v1685); real2 v1745 = plus(v1724, v1725); real2 v1739 = reverse(minus(v1725, v1724)); store(out, 2 << outShift, plus(v1744, v1745)); real2 v1758 = minus(v1744, v1745); store(out, 66 << outShift, ctimesminusplus(v1758, tbl[0 + tbloffset], ctimes(reverse(v1758), tbl[1 + tbloffset]))); real2 v1741 = minusplus(v1739, v1740); real2 v1743 = minusplus(uminus(v1739), v1740); store(out, 98 << outShift, ctimesminusplus(reverse(v1743), tbl[312 + tbloffset], ctimes(v1743, tbl[313 + tbloffset]))); store(out, 34 << outShift, ctimesminusplus(reverse(v1741), tbl[310 + tbloffset], ctimes(v1741, tbl[311 + tbloffset]))); real2 v1723 = minusplus(uminus(v1719), v1720); real2 v1721 = minusplus(v1719, v1720); real2 v1737 = ctimesminusplus(reverse(v1723), tbl[308 + tbloffset], ctimes(v1723, tbl[309 + tbloffset])); store(out, 50 << outShift, plus(v1717, v1737)); real2 v1770 = minus(v1717, v1737); store(out, 114 << outShift, ctimesminusplus(v1770, tbl[0 + tbloffset], ctimes(reverse(v1770), tbl[1 + tbloffset]))); real2 v1731 = ctimesminusplus(reverse(v1721), tbl[306 + tbloffset], ctimes(v1721, tbl[307 + tbloffset])); store(out, 18 << outShift, plus(v1711, v1731)); real2 v1764 = minus(v1711, v1731); store(out, 82 << outShift, ctimesminusplus(v1764, tbl[0 + tbloffset], ctimes(reverse(v1764), tbl[1 + tbloffset]))); real2 v809 = ctimesminusplus(reverse(v795), tbl[136 + tbloffset], ctimes(v795, tbl[137 + tbloffset])); real2 v969 = ctimesminusplus(reverse(v955), tbl[168 + tbloffset], ctimes(v955, tbl[169 + tbloffset])); real2 v1850 = minus(v969, v809); real2 v1854 = plus(v809, v969); real2 v849 = ctimesminusplus(reverse(v835), tbl[144 + tbloffset], ctimes(v835, tbl[145 + tbloffset])); real2 v929 = ctimesminusplus(reverse(v915), tbl[160 + tbloffset], ctimes(v915, tbl[161 + tbloffset])); real2 v889 = ctimesminusplus(reverse(v875), tbl[152 + tbloffset], ctimes(v875, tbl[153 + tbloffset])); real2 v1089 = ctimesminusplus(reverse(v1075), tbl[192 + tbloffset], ctimes(v1075, tbl[193 + tbloffset])); real2 v1009 = ctimesminusplus(reverse(v995), tbl[176 + tbloffset], ctimes(v995, tbl[177 + tbloffset])); real2 v1890 = minus(v1009, v849); real2 v1894 = plus(v849, v1009); real2 v1849 = reverse(minus(v1049, v889)); real2 v1855 = plus(v889, v1049); real2 v1930 = minus(v1855, v1854); real2 v1934 = plus(v1854, v1855); real2 v1895 = plus(v929, v1089); real2 v1889 = reverse(minus(v1089, v929)); real2 v1929 = reverse(minus(v1895, v1894)); real2 v1935 = plus(v1894, v1895); real2 v1955 = plus(v1934, v1935); real2 v1949 = reverse(minus(v1935, v1934)); store(out, 6 << outShift, plus(v1954, v1955)); real2 v1968 = minus(v1954, v1955); store(out, 70 << outShift, ctimesminusplus(v1968, tbl[0 + tbloffset], ctimes(reverse(v1968), tbl[1 + tbloffset]))); real2 v1951 = minusplus(v1949, v1950); store(out, 38 << outShift, ctimesminusplus(reverse(v1951), tbl[346 + tbloffset], ctimes(v1951, tbl[347 + tbloffset]))); real2 v1953 = minusplus(uminus(v1949), v1950); store(out, 102 << outShift, ctimesminusplus(reverse(v1953), tbl[348 + tbloffset], ctimes(v1953, tbl[349 + tbloffset]))); real2 v1931 = minusplus(v1929, v1930); real2 v1933 = minusplus(uminus(v1929), v1930); real2 v1947 = ctimesminusplus(reverse(v1933), tbl[344 + tbloffset], ctimes(v1933, tbl[345 + tbloffset])); store(out, 54 << outShift, plus(v1927, v1947)); real2 v1980 = minus(v1927, v1947); store(out, 118 << outShift, ctimesminusplus(v1980, tbl[0 + tbloffset], ctimes(reverse(v1980), tbl[1 + tbloffset]))); real2 v1941 = ctimesminusplus(reverse(v1931), tbl[342 + tbloffset], ctimes(v1931, tbl[343 + tbloffset])); store(out, 22 << outShift, plus(v1921, v1941)); real2 v1974 = minus(v1921, v1941); store(out, 86 << outShift, ctimesminusplus(v1974, tbl[0 + tbloffset], ctimes(reverse(v1974), tbl[1 + tbloffset]))); real2 v1851 = minusplus(v1849, v1850); real2 v1853 = minusplus(uminus(v1849), v1850); real2 v1867 = ctimesminusplus(reverse(v1853), tbl[328 + tbloffset], ctimes(v1853, tbl[329 + tbloffset])); real2 v1891 = minusplus(v1889, v1890); real2 v1893 = minusplus(uminus(v1889), v1890); real2 v1907 = ctimesminusplus(reverse(v1893), tbl[336 + tbloffset], ctimes(v1893, tbl[337 + tbloffset])); real2 v2019 = plus(v1867, v1907); real2 v2013 = reverse(minus(v1907, v1867)); store(out, 30 << outShift, plus(v2018, v2019)); real2 v2032 = minus(v2018, v2019); store(out, 94 << outShift, ctimesminusplus(v2032, tbl[0 + tbloffset], ctimes(reverse(v2032), tbl[1 + tbloffset]))); real2 v2017 = minusplus(uminus(v2013), v2014); store(out, 126 << outShift, ctimesminusplus(reverse(v2017), tbl[356 + tbloffset], ctimes(v2017, tbl[357 + tbloffset]))); real2 v2015 = minusplus(v2013, v2014); store(out, 62 << outShift, ctimesminusplus(reverse(v2015), tbl[354 + tbloffset], ctimes(v2015, tbl[355 + tbloffset]))); real2 v1861 = ctimesminusplus(reverse(v1851), tbl[326 + tbloffset], ctimes(v1851, tbl[327 + tbloffset])); real2 v1901 = ctimesminusplus(reverse(v1891), tbl[334 + tbloffset], ctimes(v1891, tbl[335 + tbloffset])); real2 v1993 = plus(v1861, v1901); real2 v1987 = reverse(minus(v1901, v1861)); store(out, 14 << outShift, plus(v1992, v1993)); real2 v2006 = minus(v1992, v1993); store(out, 78 << outShift, ctimesminusplus(v2006, tbl[0 + tbloffset], ctimes(reverse(v2006), tbl[1 + tbloffset]))); real2 v1991 = minusplus(uminus(v1987), v1988); store(out, 110 << outShift, ctimesminusplus(reverse(v1991), tbl[352 + tbloffset], ctimes(v1991, tbl[353 + tbloffset]))); real2 v1989 = minusplus(v1987, v1988); store(out, 46 << outShift, ctimesminusplus(reverse(v1989), tbl[350 + tbloffset], ctimes(v1989, tbl[351 + tbloffset]))); real2 v593 = minusplus(v591, v592); real2 v595 = minusplus(uminus(v591), v592); real2 v473 = minusplus(v471, v472); real2 v475 = minusplus(uminus(v471), v472); real2 v555 = minusplus(uminus(v551), v552); real2 v553 = minusplus(v551, v552); real2 v609 = ctimesminusplus(reverse(v595), tbl[96 + tbloffset], ctimes(v595, tbl[97 + tbloffset])); real2 v195 = minusplus(uminus(v191), v192); real2 v193 = minusplus(v191, v192); real2 v275 = minusplus(uminus(v271), v272); real2 v273 = minusplus(v271, v272); real2 v673 = minusplus(v671, v672); real2 v675 = minusplus(uminus(v671), v672); real2 v689 = ctimesminusplus(reverse(v675), tbl[112 + tbloffset], ctimes(v675, tbl[113 + tbloffset])); real2 v209 = ctimesminusplus(reverse(v195), tbl[16 + tbloffset], ctimes(v195, tbl[17 + tbloffset])); real2 v289 = ctimesminusplus(reverse(v275), tbl[32 + tbloffset], ctimes(v275, tbl[33 + tbloffset])); real2 v755 = minusplus(uminus(v751), v752); real2 v753 = minusplus(v751, v752); real2 v435 = minusplus(uminus(v431), v432); real2 v433 = minusplus(v431, v432); real2 v513 = minusplus(v511, v512); real2 v515 = minusplus(uminus(v511), v512); real2 v529 = ctimesminusplus(reverse(v515), tbl[80 + tbloffset], ctimes(v515, tbl[81 + tbloffset])); real2 v353 = minusplus(v351, v352); real2 v355 = minusplus(uminus(v351), v352); real2 v369 = ctimesminusplus(reverse(v355), tbl[48 + tbloffset], ctimes(v355, tbl[49 + tbloffset])); real2 v2631 = plus(v369, v689); real2 v2625 = reverse(minus(v689, v369)); real2 v449 = ctimesminusplus(reverse(v435), tbl[64 + tbloffset], ctimes(v435, tbl[65 + tbloffset])); real2 v2710 = plus(v289, v609); real2 v2706 = minus(v609, v289); real2 v2630 = plus(v209, v529); real2 v2626 = minus(v529, v209); real2 v2790 = plus(v2630, v2631); real2 v2786 = minus(v2631, v2630); real2 v713 = minusplus(v711, v712); real2 v715 = minusplus(uminus(v711), v712); real2 v769 = ctimesminusplus(reverse(v755), tbl[128 + tbloffset], ctimes(v755, tbl[129 + tbloffset])); real2 v2705 = reverse(minus(v769, v449)); real2 v2711 = plus(v449, v769); real2 v313 = minusplus(v311, v312); real2 v315 = minusplus(uminus(v311), v312); real2 v393 = minusplus(v391, v392); real2 v395 = minusplus(uminus(v391), v392); real2 v409 = ctimesminusplus(reverse(v395), tbl[56 + tbloffset], ctimes(v395, tbl[57 + tbloffset])); real2 v729 = ctimesminusplus(reverse(v715), tbl[120 + tbloffset], ctimes(v715, tbl[121 + tbloffset])); real2 v329 = ctimesminusplus(reverse(v315), tbl[40 + tbloffset], ctimes(v315, tbl[41 + tbloffset])); real2 v489 = ctimesminusplus(reverse(v475), tbl[72 + tbloffset], ctimes(v475, tbl[73 + tbloffset])); real2 v153 = minusplus(v151, v152); real2 v155 = minusplus(uminus(v151), v152); real2 v169 = ctimesminusplus(reverse(v155), tbl[8 + tbloffset], ctimes(v155, tbl[9 + tbloffset])); real2 v2586 = minus(v489, v169); real2 v2590 = plus(v169, v489); real2 v233 = minusplus(v231, v232); real2 v235 = minusplus(uminus(v231), v232); real2 v633 = minusplus(v631, v632); real2 v635 = minusplus(uminus(v631), v632); real2 v649 = ctimesminusplus(reverse(v635), tbl[104 + tbloffset], ctimes(v635, tbl[105 + tbloffset])); real2 v249 = ctimesminusplus(reverse(v235), tbl[24 + tbloffset], ctimes(v235, tbl[25 + tbloffset])); real2 v569 = ctimesminusplus(reverse(v555), tbl[88 + tbloffset], ctimes(v555, tbl[89 + tbloffset])); real2 v2670 = plus(v249, v569); real2 v2666 = minus(v569, v249); real2 v2785 = reverse(minus(v2711, v2710)); real2 v2791 = plus(v2710, v2711); real2 v2825 = reverse(minus(v2791, v2790)); real2 v2831 = plus(v2790, v2791); real2 v2671 = plus(v409, v729); real2 v2665 = reverse(minus(v729, v409)); real2 v2745 = reverse(minus(v2671, v2670)); real2 v2751 = plus(v2670, v2671); real2 v2806 = minus(v2731, v2730); real2 v2810 = plus(v2730, v2731); real2 v2846 = minus(v2811, v2810); real2 v2850 = plus(v2810, v2811); real2 v2591 = plus(v329, v649); real2 v2585 = reverse(minus(v649, v329)); real2 v2750 = plus(v2590, v2591); real2 v2746 = minus(v2591, v2590); real2 v2830 = plus(v2750, v2751); real2 v2826 = minus(v2751, v2750); real2 v2845 = reverse(minus(v2831, v2830)); real2 v2851 = plus(v2830, v2831); store(out, 3 << outShift, plus(v2850, v2851)); real2 v2864 = minus(v2850, v2851); store(out, 67 << outShift, ctimesminusplus(v2864, tbl[0 + tbloffset], ctimes(reverse(v2864), tbl[1 + tbloffset]))); real2 v2849 = minusplus(uminus(v2845), v2846); real2 v2847 = minusplus(v2845, v2846); store(out, 35 << outShift, ctimesminusplus(reverse(v2847), tbl[506 + tbloffset], ctimes(v2847, tbl[507 + tbloffset]))); store(out, 99 << outShift, ctimesminusplus(reverse(v2849), tbl[508 + tbloffset], ctimes(v2849, tbl[509 + tbloffset]))); real2 v2827 = minusplus(v2825, v2826); real2 v2829 = minusplus(uminus(v2825), v2826); real2 v2837 = ctimesminusplus(reverse(v2827), tbl[502 + tbloffset], ctimes(v2827, tbl[503 + tbloffset])); real2 v2809 = minusplus(uminus(v2805), v2806); real2 v2807 = minusplus(v2805, v2806); real2 v2817 = ctimesminusplus(reverse(v2807), tbl[498 + tbloffset], ctimes(v2807, tbl[499 + tbloffset])); store(out, 19 << outShift, plus(v2817, v2837)); real2 v2870 = minus(v2817, v2837); store(out, 83 << outShift, ctimesminusplus(v2870, tbl[0 + tbloffset], ctimes(reverse(v2870), tbl[1 + tbloffset]))); real2 v2823 = ctimesminusplus(reverse(v2809), tbl[500 + tbloffset], ctimes(v2809, tbl[501 + tbloffset])); real2 v2843 = ctimesminusplus(reverse(v2829), tbl[504 + tbloffset], ctimes(v2829, tbl[505 + tbloffset])); store(out, 51 << outShift, plus(v2823, v2843)); real2 v2876 = minus(v2823, v2843); store(out, 115 << outShift, ctimesminusplus(v2876, tbl[0 + tbloffset], ctimes(reverse(v2876), tbl[1 + tbloffset]))); real2 v2787 = minusplus(v2785, v2786); real2 v2789 = minusplus(uminus(v2785), v2786); real2 v2803 = ctimesminusplus(reverse(v2789), tbl[496 + tbloffset], ctimes(v2789, tbl[497 + tbloffset])); real2 v2727 = minusplus(v2725, v2726); real2 v2729 = minusplus(uminus(v2725), v2726); real2 v2743 = ctimesminusplus(reverse(v2729), tbl[484 + tbloffset], ctimes(v2729, tbl[485 + tbloffset])); real2 v2914 = plus(v2743, v2783); real2 v2910 = minus(v2783, v2743); real2 v2749 = minusplus(uminus(v2745), v2746); real2 v2747 = minusplus(v2745, v2746); real2 v2763 = ctimesminusplus(reverse(v2749), tbl[488 + tbloffset], ctimes(v2749, tbl[489 + tbloffset])); real2 v2909 = reverse(minus(v2803, v2763)); real2 v2915 = plus(v2763, v2803); store(out, 27 << outShift, plus(v2914, v2915)); real2 v2928 = minus(v2914, v2915); store(out, 91 << outShift, ctimesminusplus(v2928, tbl[0 + tbloffset], ctimes(reverse(v2928), tbl[1 + tbloffset]))); real2 v2913 = minusplus(uminus(v2909), v2910); store(out, 123 << outShift, ctimesminusplus(reverse(v2913), tbl[516 + tbloffset], ctimes(v2913, tbl[517 + tbloffset]))); real2 v2911 = minusplus(v2909, v2910); store(out, 59 << outShift, ctimesminusplus(reverse(v2911), tbl[514 + tbloffset], ctimes(v2911, tbl[515 + tbloffset]))); real2 v2737 = ctimesminusplus(reverse(v2727), tbl[482 + tbloffset], ctimes(v2727, tbl[483 + tbloffset])); real2 v2888 = plus(v2737, v2777); real2 v2884 = minus(v2777, v2737); real2 v2797 = ctimesminusplus(reverse(v2787), tbl[494 + tbloffset], ctimes(v2787, tbl[495 + tbloffset])); real2 v2757 = ctimesminusplus(reverse(v2747), tbl[486 + tbloffset], ctimes(v2747, tbl[487 + tbloffset])); real2 v2889 = plus(v2757, v2797); real2 v2883 = reverse(minus(v2797, v2757)); store(out, 11 << outShift, plus(v2888, v2889)); real2 v2902 = minus(v2888, v2889); store(out, 75 << outShift, ctimesminusplus(v2902, tbl[0 + tbloffset], ctimes(reverse(v2902), tbl[1 + tbloffset]))); real2 v2887 = minusplus(uminus(v2883), v2884); store(out, 107 << outShift, ctimesminusplus(reverse(v2887), tbl[512 + tbloffset], ctimes(v2887, tbl[513 + tbloffset]))); real2 v2885 = minusplus(v2883, v2884); store(out, 43 << outShift, ctimesminusplus(reverse(v2885), tbl[510 + tbloffset], ctimes(v2885, tbl[511 + tbloffset]))); real2 v2669 = minusplus(uminus(v2665), v2666); real2 v2667 = minusplus(v2665, v2666); real2 v2707 = minusplus(v2705, v2706); real2 v2709 = minusplus(uminus(v2705), v2706); real2 v2717 = ctimesminusplus(reverse(v2707), tbl[478 + tbloffset], ctimes(v2707, tbl[479 + tbloffset])); real2 v2627 = minusplus(v2625, v2626); real2 v2629 = minusplus(uminus(v2625), v2626); real2 v2637 = ctimesminusplus(reverse(v2627), tbl[462 + tbloffset], ctimes(v2627, tbl[463 + tbloffset])); real2 v2961 = plus(v2637, v2717); real2 v2955 = reverse(minus(v2717, v2637)); real2 v2649 = minusplus(uminus(v2645), v2646); real2 v2647 = minusplus(v2645, v2646); real2 v2569 = minusplus(uminus(v2565), v2566); real2 v2567 = minusplus(v2565, v2566); real2 v2577 = ctimesminusplus(reverse(v2567), tbl[450 + tbloffset], ctimes(v2567, tbl[451 + tbloffset])); real2 v2657 = ctimesminusplus(reverse(v2647), tbl[466 + tbloffset], ctimes(v2647, tbl[467 + tbloffset])); real2 v2936 = minus(v2657, v2577); real2 v2940 = plus(v2577, v2657); real2 v2976 = minus(v2941, v2940); real2 v2980 = plus(v2940, v2941); real2 v2677 = ctimesminusplus(reverse(v2667), tbl[470 + tbloffset], ctimes(v2667, tbl[471 + tbloffset])); real2 v2587 = minusplus(v2585, v2586); real2 v2589 = minusplus(uminus(v2585), v2586); real2 v2597 = ctimesminusplus(reverse(v2587), tbl[454 + tbloffset], ctimes(v2587, tbl[455 + tbloffset])); real2 v2956 = minus(v2677, v2597); real2 v2960 = plus(v2597, v2677); real2 v2975 = reverse(minus(v2961, v2960)); real2 v2981 = plus(v2960, v2961); store(out, 7 << outShift, plus(v2980, v2981)); real2 v2994 = minus(v2980, v2981); store(out, 71 << outShift, ctimesminusplus(v2994, tbl[0 + tbloffset], ctimes(reverse(v2994), tbl[1 + tbloffset]))); real2 v2979 = minusplus(uminus(v2975), v2976); store(out, 103 << outShift, ctimesminusplus(reverse(v2979), tbl[528 + tbloffset], ctimes(v2979, tbl[529 + tbloffset]))); real2 v2977 = minusplus(v2975, v2976); store(out, 39 << outShift, ctimesminusplus(reverse(v2977), tbl[526 + tbloffset], ctimes(v2977, tbl[527 + tbloffset]))); real2 v2939 = minusplus(uminus(v2935), v2936); real2 v2937 = minusplus(v2935, v2936); real2 v2953 = ctimesminusplus(reverse(v2939), tbl[520 + tbloffset], ctimes(v2939, tbl[521 + tbloffset])); real2 v2957 = minusplus(v2955, v2956); real2 v2959 = minusplus(uminus(v2955), v2956); real2 v2973 = ctimesminusplus(reverse(v2959), tbl[524 + tbloffset], ctimes(v2959, tbl[525 + tbloffset])); store(out, 55 << outShift, plus(v2953, v2973)); real2 v3006 = minus(v2953, v2973); store(out, 119 << outShift, ctimesminusplus(v3006, tbl[0 + tbloffset], ctimes(reverse(v3006), tbl[1 + tbloffset]))); real2 v2947 = ctimesminusplus(reverse(v2937), tbl[518 + tbloffset], ctimes(v2937, tbl[519 + tbloffset])); real2 v2967 = ctimesminusplus(reverse(v2957), tbl[522 + tbloffset], ctimes(v2957, tbl[523 + tbloffset])); store(out, 23 << outShift, plus(v2947, v2967)); real2 v3000 = minus(v2947, v2967); store(out, 87 << outShift, ctimesminusplus(v3000, tbl[0 + tbloffset], ctimes(reverse(v3000), tbl[1 + tbloffset]))); real2 v2663 = ctimesminusplus(reverse(v2649), tbl[468 + tbloffset], ctimes(v2649, tbl[469 + tbloffset])); real2 v2583 = ctimesminusplus(reverse(v2569), tbl[452 + tbloffset], ctimes(v2569, tbl[453 + tbloffset])); real2 v3014 = minus(v2663, v2583); real2 v3018 = plus(v2583, v2663); real2 v3015 = minusplus(v3013, v3014); real2 v3017 = minusplus(uminus(v3013), v3014); real2 v2643 = ctimesminusplus(reverse(v2629), tbl[464 + tbloffset], ctimes(v2629, tbl[465 + tbloffset])); real2 v2723 = ctimesminusplus(reverse(v2709), tbl[480 + tbloffset], ctimes(v2709, tbl[481 + tbloffset])); real2 v3039 = plus(v2643, v2723); real2 v3033 = reverse(minus(v2723, v2643)); real2 v2683 = ctimesminusplus(reverse(v2669), tbl[472 + tbloffset], ctimes(v2669, tbl[473 + tbloffset])); real2 v3031 = ctimesminusplus(reverse(v3017), tbl[532 + tbloffset], ctimes(v3017, tbl[533 + tbloffset])); real2 v2603 = ctimesminusplus(reverse(v2589), tbl[456 + tbloffset], ctimes(v2589, tbl[457 + tbloffset])); real2 v3034 = minus(v2683, v2603); real2 v3038 = plus(v2603, v2683); real2 v3037 = minusplus(uminus(v3033), v3034); real2 v3035 = minusplus(v3033, v3034); real2 v3051 = ctimesminusplus(reverse(v3037), tbl[536 + tbloffset], ctimes(v3037, tbl[537 + tbloffset])); store(out, 63 << outShift, plus(v3031, v3051)); real2 v3084 = minus(v3031, v3051); store(out, 127 << outShift, ctimesminusplus(v3084, tbl[0 + tbloffset], ctimes(reverse(v3084), tbl[1 + tbloffset]))); real2 v3025 = ctimesminusplus(reverse(v3015), tbl[530 + tbloffset], ctimes(v3015, tbl[531 + tbloffset])); real2 v3045 = ctimesminusplus(reverse(v3035), tbl[534 + tbloffset], ctimes(v3035, tbl[535 + tbloffset])); store(out, 31 << outShift, plus(v3025, v3045)); real2 v3078 = minus(v3025, v3045); store(out, 95 << outShift, ctimesminusplus(v3078, tbl[0 + tbloffset], ctimes(reverse(v3078), tbl[1 + tbloffset]))); real2 v3058 = plus(v3018, v3019); real2 v3054 = minus(v3019, v3018); real2 v3053 = reverse(minus(v3039, v3038)); real2 v3059 = plus(v3038, v3039); real2 v3055 = minusplus(v3053, v3054); store(out, 47 << outShift, ctimesminusplus(reverse(v3055), tbl[538 + tbloffset], ctimes(v3055, tbl[539 + tbloffset]))); real2 v3057 = minusplus(uminus(v3053), v3054); store(out, 111 << outShift, ctimesminusplus(reverse(v3057), tbl[540 + tbloffset], ctimes(v3057, tbl[541 + tbloffset]))); store(out, 15 << outShift, plus(v3058, v3059)); real2 v3072 = minus(v3058, v3059); store(out, 79 << outShift, ctimesminusplus(v3072, tbl[0 + tbloffset], ctimes(reverse(v3072), tbl[1 + tbloffset]))); real2 v683 = ctimesminusplus(reverse(v673), tbl[110 + tbloffset], ctimes(v673, tbl[111 + tbloffset])); real2 v363 = ctimesminusplus(reverse(v353), tbl[46 + tbloffset], ctimes(v353, tbl[47 + tbloffset])); real2 v2105 = plus(v363, v683); real2 v2099 = reverse(minus(v683, v363)); real2 v283 = ctimesminusplus(reverse(v273), tbl[30 + tbloffset], ctimes(v273, tbl[31 + tbloffset])); real2 v723 = ctimesminusplus(reverse(v713), tbl[118 + tbloffset], ctimes(v713, tbl[119 + tbloffset])); real2 v403 = ctimesminusplus(reverse(v393), tbl[54 + tbloffset], ctimes(v393, tbl[55 + tbloffset])); real2 v603 = ctimesminusplus(reverse(v593), tbl[94 + tbloffset], ctimes(v593, tbl[95 + tbloffset])); real2 v2180 = minus(v603, v283); real2 v2184 = plus(v283, v603); real2 v2145 = plus(v403, v723); real2 v2139 = reverse(minus(v723, v403)); real2 v543 = ctimesminusplus(reverse(v533), tbl[82 + tbloffset], ctimes(v533, tbl[83 + tbloffset])); real2 v383 = ctimesminusplus(reverse(v373), tbl[50 + tbloffset], ctimes(v373, tbl[51 + tbloffset])); real2 v703 = ctimesminusplus(reverse(v693), tbl[114 + tbloffset], ctimes(v693, tbl[115 + tbloffset])); real2 v2125 = plus(v383, v703); real2 v2119 = reverse(minus(v703, v383)); real2 v223 = ctimesminusplus(reverse(v213), tbl[18 + tbloffset], ctimes(v213, tbl[19 + tbloffset])); real2 v2120 = minus(v543, v223); real2 v2124 = plus(v223, v543); real2 v443 = ctimesminusplus(reverse(v433), tbl[62 + tbloffset], ctimes(v433, tbl[63 + tbloffset])); real2 v203 = ctimesminusplus(reverse(v193), tbl[14 + tbloffset], ctimes(v193, tbl[15 + tbloffset])); real2 v763 = ctimesminusplus(reverse(v753), tbl[126 + tbloffset], ctimes(v753, tbl[127 + tbloffset])); real2 v2179 = reverse(minus(v763, v443)); real2 v2185 = plus(v443, v763); real2 v523 = ctimesminusplus(reverse(v513), tbl[78 + tbloffset], ctimes(v513, tbl[79 + tbloffset])); real2 v2100 = minus(v523, v203); real2 v2104 = plus(v203, v523); real2 v2264 = plus(v2104, v2105); real2 v2260 = minus(v2105, v2104); real2 v643 = ctimesminusplus(reverse(v633), tbl[102 + tbloffset], ctimes(v633, tbl[103 + tbloffset])); real2 v2265 = plus(v2184, v2185); real2 v2259 = reverse(minus(v2185, v2184)); real2 v563 = ctimesminusplus(reverse(v553), tbl[86 + tbloffset], ctimes(v553, tbl[87 + tbloffset])); real2 v243 = ctimesminusplus(reverse(v233), tbl[22 + tbloffset], ctimes(v233, tbl[23 + tbloffset])); real2 v2144 = plus(v243, v563); real2 v2140 = minus(v563, v243); real2 v143 = ctimesminusplus(reverse(v133), tbl[2 + tbloffset], ctimes(v133, tbl[3 + tbloffset])); real2 v183 = ctimesminusplus(reverse(v173), tbl[10 + tbloffset], ctimes(v173, tbl[11 + tbloffset])); real2 v2084 = plus(v183, v503); real2 v2080 = minus(v503, v183); real2 v163 = ctimesminusplus(reverse(v153), tbl[6 + tbloffset], ctimes(v153, tbl[7 + tbloffset])); real2 v303 = ctimesminusplus(reverse(v293), tbl[34 + tbloffset], ctimes(v293, tbl[35 + tbloffset])); real2 v623 = ctimesminusplus(reverse(v613), tbl[98 + tbloffset], ctimes(v613, tbl[99 + tbloffset])); real2 v2039 = reverse(minus(v623, v303)); real2 v2045 = plus(v303, v623); real2 v463 = ctimesminusplus(reverse(v453), tbl[66 + tbloffset], ctimes(v453, tbl[67 + tbloffset])); real2 v2044 = plus(v143, v463); real2 v2040 = minus(v463, v143); real2 v2204 = plus(v2044, v2045); real2 v2200 = minus(v2045, v2044); real2 v323 = ctimesminusplus(reverse(v313), tbl[38 + tbloffset], ctimes(v313, tbl[39 + tbloffset])); real2 v2205 = plus(v2124, v2125); real2 v2199 = reverse(minus(v2125, v2124)); real2 v2280 = minus(v2205, v2204); real2 v2284 = plus(v2204, v2205); real2 v2225 = plus(v2144, v2145); real2 v2219 = reverse(minus(v2145, v2144)); real2 v2305 = plus(v2264, v2265); real2 v2299 = reverse(minus(v2265, v2264)); real2 v2240 = minus(v2085, v2084); real2 v2244 = plus(v2084, v2085); real2 v2279 = reverse(minus(v2245, v2244)); real2 v2285 = plus(v2244, v2245); real2 v2281 = minusplus(v2279, v2280); real2 v2283 = minusplus(uminus(v2279), v2280); real2 v2291 = ctimesminusplus(reverse(v2281), tbl[406 + tbloffset], ctimes(v2281, tbl[407 + tbloffset])); real2 v483 = ctimesminusplus(reverse(v473), tbl[70 + tbloffset], ctimes(v473, tbl[71 + tbloffset])); real2 v2060 = minus(v483, v163); real2 v2064 = plus(v163, v483); real2 v2065 = plus(v323, v643); real2 v2059 = reverse(minus(v643, v323)); real2 v2220 = minus(v2065, v2064); real2 v2224 = plus(v2064, v2065); real2 v2304 = plus(v2224, v2225); real2 v2300 = minus(v2225, v2224); real2 v2301 = minusplus(v2299, v2300); real2 v2303 = minusplus(uminus(v2299), v2300); real2 v2311 = ctimesminusplus(reverse(v2301), tbl[410 + tbloffset], ctimes(v2301, tbl[411 + tbloffset])); store(out, 17 << outShift, plus(v2291, v2311)); real2 v2344 = minus(v2291, v2311); store(out, 81 << outShift, ctimesminusplus(v2344, tbl[0 + tbloffset], ctimes(reverse(v2344), tbl[1 + tbloffset]))); real2 v2297 = ctimesminusplus(reverse(v2283), tbl[408 + tbloffset], ctimes(v2283, tbl[409 + tbloffset])); real2 v2317 = ctimesminusplus(reverse(v2303), tbl[412 + tbloffset], ctimes(v2303, tbl[413 + tbloffset])); store(out, 49 << outShift, plus(v2297, v2317)); real2 v2350 = minus(v2297, v2317); store(out, 113 << outShift, ctimesminusplus(v2350, tbl[0 + tbloffset], ctimes(reverse(v2350), tbl[1 + tbloffset]))); real2 v2320 = minus(v2285, v2284); real2 v2324 = plus(v2284, v2285); real2 v2325 = plus(v2304, v2305); real2 v2319 = reverse(minus(v2305, v2304)); store(out, 1 << outShift, plus(v2324, v2325)); real2 v2338 = minus(v2324, v2325); store(out, 65 << outShift, ctimesminusplus(v2338, tbl[0 + tbloffset], ctimes(reverse(v2338), tbl[1 + tbloffset]))); real2 v2321 = minusplus(v2319, v2320); store(out, 33 << outShift, ctimesminusplus(reverse(v2321), tbl[414 + tbloffset], ctimes(v2321, tbl[415 + tbloffset]))); real2 v2323 = minusplus(uminus(v2319), v2320); store(out, 97 << outShift, ctimesminusplus(reverse(v2323), tbl[416 + tbloffset], ctimes(v2323, tbl[417 + tbloffset]))); real2 v2201 = minusplus(v2199, v2200); real2 v2203 = minusplus(uminus(v2199), v2200); real2 v2263 = minusplus(uminus(v2259), v2260); real2 v2261 = minusplus(v2259, v2260); real2 v2243 = minusplus(uminus(v2239), v2240); real2 v2241 = minusplus(v2239, v2240); real2 v2257 = ctimesminusplus(reverse(v2243), tbl[400 + tbloffset], ctimes(v2243, tbl[401 + tbloffset])); real2 v2217 = ctimesminusplus(reverse(v2203), tbl[392 + tbloffset], ctimes(v2203, tbl[393 + tbloffset])); real2 v2388 = plus(v2217, v2257); real2 v2384 = minus(v2257, v2217); real2 v2277 = ctimesminusplus(reverse(v2263), tbl[404 + tbloffset], ctimes(v2263, tbl[405 + tbloffset])); real2 v2221 = minusplus(v2219, v2220); real2 v2223 = minusplus(uminus(v2219), v2220); real2 v2237 = ctimesminusplus(reverse(v2223), tbl[396 + tbloffset], ctimes(v2223, tbl[397 + tbloffset])); real2 v2389 = plus(v2237, v2277); real2 v2383 = reverse(minus(v2277, v2237)); store(out, 25 << outShift, plus(v2388, v2389)); real2 v2402 = minus(v2388, v2389); store(out, 89 << outShift, ctimesminusplus(v2402, tbl[0 + tbloffset], ctimes(reverse(v2402), tbl[1 + tbloffset]))); real2 v2385 = minusplus(v2383, v2384); real2 v2387 = minusplus(uminus(v2383), v2384); store(out, 121 << outShift, ctimesminusplus(reverse(v2387), tbl[424 + tbloffset], ctimes(v2387, tbl[425 + tbloffset]))); store(out, 57 << outShift, ctimesminusplus(reverse(v2385), tbl[422 + tbloffset], ctimes(v2385, tbl[423 + tbloffset]))); real2 v2251 = ctimesminusplus(reverse(v2241), tbl[398 + tbloffset], ctimes(v2241, tbl[399 + tbloffset])); real2 v2211 = ctimesminusplus(reverse(v2201), tbl[390 + tbloffset], ctimes(v2201, tbl[391 + tbloffset])); real2 v2358 = minus(v2251, v2211); real2 v2362 = plus(v2211, v2251); real2 v2271 = ctimesminusplus(reverse(v2261), tbl[402 + tbloffset], ctimes(v2261, tbl[403 + tbloffset])); real2 v2231 = ctimesminusplus(reverse(v2221), tbl[394 + tbloffset], ctimes(v2221, tbl[395 + tbloffset])); real2 v2357 = reverse(minus(v2271, v2231)); real2 v2363 = plus(v2231, v2271); store(out, 9 << outShift, plus(v2362, v2363)); real2 v2376 = minus(v2362, v2363); store(out, 73 << outShift, ctimesminusplus(v2376, tbl[0 + tbloffset], ctimes(reverse(v2376), tbl[1 + tbloffset]))); real2 v2361 = minusplus(uminus(v2357), v2358); store(out, 105 << outShift, ctimesminusplus(reverse(v2361), tbl[420 + tbloffset], ctimes(v2361, tbl[421 + tbloffset]))); real2 v2359 = minusplus(v2357, v2358); store(out, 41 << outShift, ctimesminusplus(reverse(v2359), tbl[418 + tbloffset], ctimes(v2359, tbl[419 + tbloffset]))); real2 v2121 = minusplus(v2119, v2120); real2 v2123 = minusplus(uminus(v2119), v2120); real2 v2083 = minusplus(uminus(v2079), v2080); real2 v2081 = minusplus(v2079, v2080); real2 v2091 = ctimesminusplus(reverse(v2081), tbl[366 + tbloffset], ctimes(v2081, tbl[367 + tbloffset])); real2 v2043 = minusplus(uminus(v2039), v2040); real2 v2041 = minusplus(v2039, v2040); real2 v2051 = ctimesminusplus(reverse(v2041), tbl[358 + tbloffset], ctimes(v2041, tbl[359 + tbloffset])); real2 v2131 = ctimesminusplus(reverse(v2121), tbl[374 + tbloffset], ctimes(v2121, tbl[375 + tbloffset])); real2 v2163 = minusplus(uminus(v2159), v2160); real2 v2161 = minusplus(v2159, v2160); real2 v2171 = ctimesminusplus(reverse(v2161), tbl[382 + tbloffset], ctimes(v2161, tbl[383 + tbloffset])); real2 v2409 = reverse(minus(v2171, v2091)); real2 v2415 = plus(v2091, v2171); real2 v2410 = minus(v2131, v2051); real2 v2414 = plus(v2051, v2131); real2 v2454 = plus(v2414, v2415); real2 v2450 = minus(v2415, v2414); real2 v2181 = minusplus(v2179, v2180); real2 v2183 = minusplus(uminus(v2179), v2180); real2 v2191 = ctimesminusplus(reverse(v2181), tbl[386 + tbloffset], ctimes(v2181, tbl[387 + tbloffset])); real2 v2103 = minusplus(uminus(v2099), v2100); real2 v2101 = minusplus(v2099, v2100); real2 v2111 = ctimesminusplus(reverse(v2101), tbl[370 + tbloffset], ctimes(v2101, tbl[371 + tbloffset])); real2 v2435 = plus(v2111, v2191); real2 v2429 = reverse(minus(v2191, v2111)); real2 v2141 = minusplus(v2139, v2140); real2 v2143 = minusplus(uminus(v2139), v2140); real2 v2151 = ctimesminusplus(reverse(v2141), tbl[378 + tbloffset], ctimes(v2141, tbl[379 + tbloffset])); real2 v2063 = minusplus(uminus(v2059), v2060); real2 v2061 = minusplus(v2059, v2060); real2 v2071 = ctimesminusplus(reverse(v2061), tbl[362 + tbloffset], ctimes(v2061, tbl[363 + tbloffset])); real2 v2434 = plus(v2071, v2151); real2 v2430 = minus(v2151, v2071); real2 v2455 = plus(v2434, v2435); real2 v2449 = reverse(minus(v2435, v2434)); store(out, 5 << outShift, plus(v2454, v2455)); real2 v2468 = minus(v2454, v2455); store(out, 69 << outShift, ctimesminusplus(v2468, tbl[0 + tbloffset], ctimes(reverse(v2468), tbl[1 + tbloffset]))); real2 v2451 = minusplus(v2449, v2450); real2 v2453 = minusplus(uminus(v2449), v2450); store(out, 101 << outShift, ctimesminusplus(reverse(v2453), tbl[436 + tbloffset], ctimes(v2453, tbl[437 + tbloffset]))); store(out, 37 << outShift, ctimesminusplus(reverse(v2451), tbl[434 + tbloffset], ctimes(v2451, tbl[435 + tbloffset]))); real2 v2411 = minusplus(v2409, v2410); real2 v2413 = minusplus(uminus(v2409), v2410); real2 v2433 = minusplus(uminus(v2429), v2430); real2 v2431 = minusplus(v2429, v2430); real2 v2421 = ctimesminusplus(reverse(v2411), tbl[426 + tbloffset], ctimes(v2411, tbl[427 + tbloffset])); real2 v2441 = ctimesminusplus(reverse(v2431), tbl[430 + tbloffset], ctimes(v2431, tbl[431 + tbloffset])); store(out, 21 << outShift, plus(v2421, v2441)); real2 v2474 = minus(v2421, v2441); store(out, 85 << outShift, ctimesminusplus(v2474, tbl[0 + tbloffset], ctimes(reverse(v2474), tbl[1 + tbloffset]))); real2 v2427 = ctimesminusplus(reverse(v2413), tbl[428 + tbloffset], ctimes(v2413, tbl[429 + tbloffset])); real2 v2447 = ctimesminusplus(reverse(v2433), tbl[432 + tbloffset], ctimes(v2433, tbl[433 + tbloffset])); store(out, 53 << outShift, plus(v2427, v2447)); real2 v2480 = minus(v2427, v2447); store(out, 117 << outShift, ctimesminusplus(v2480, tbl[0 + tbloffset], ctimes(reverse(v2480), tbl[1 + tbloffset]))); real2 v2057 = ctimesminusplus(reverse(v2043), tbl[360 + tbloffset], ctimes(v2043, tbl[361 + tbloffset])); real2 v2097 = ctimesminusplus(reverse(v2083), tbl[368 + tbloffset], ctimes(v2083, tbl[369 + tbloffset])); real2 v2157 = ctimesminusplus(reverse(v2143), tbl[380 + tbloffset], ctimes(v2143, tbl[381 + tbloffset])); real2 v2197 = ctimesminusplus(reverse(v2183), tbl[388 + tbloffset], ctimes(v2183, tbl[389 + tbloffset])); real2 v2117 = ctimesminusplus(reverse(v2103), tbl[372 + tbloffset], ctimes(v2103, tbl[373 + tbloffset])); real2 v2507 = reverse(minus(v2197, v2117)); real2 v2513 = plus(v2117, v2197); real2 v2137 = ctimesminusplus(reverse(v2123), tbl[376 + tbloffset], ctimes(v2123, tbl[377 + tbloffset])); real2 v2488 = minus(v2137, v2057); real2 v2492 = plus(v2057, v2137); real2 v2177 = ctimesminusplus(reverse(v2163), tbl[384 + tbloffset], ctimes(v2163, tbl[385 + tbloffset])); real2 v2493 = plus(v2097, v2177); real2 v2487 = reverse(minus(v2177, v2097)); real2 v2532 = plus(v2492, v2493); real2 v2528 = minus(v2493, v2492); real2 v2077 = ctimesminusplus(reverse(v2063), tbl[364 + tbloffset], ctimes(v2063, tbl[365 + tbloffset])); real2 v2512 = plus(v2077, v2157); real2 v2508 = minus(v2157, v2077); real2 v2527 = reverse(minus(v2513, v2512)); real2 v2533 = plus(v2512, v2513); real2 v2529 = minusplus(v2527, v2528); real2 v2531 = minusplus(uminus(v2527), v2528); store(out, 109 << outShift, ctimesminusplus(reverse(v2531), tbl[448 + tbloffset], ctimes(v2531, tbl[449 + tbloffset]))); store(out, 45 << outShift, ctimesminusplus(reverse(v2529), tbl[446 + tbloffset], ctimes(v2529, tbl[447 + tbloffset]))); store(out, 13 << outShift, plus(v2532, v2533)); real2 v2546 = minus(v2532, v2533); store(out, 77 << outShift, ctimesminusplus(v2546, tbl[0 + tbloffset], ctimes(reverse(v2546), tbl[1 + tbloffset]))); real2 v2509 = minusplus(v2507, v2508); real2 v2511 = minusplus(uminus(v2507), v2508); real2 v2491 = minusplus(uminus(v2487), v2488); real2 v2489 = minusplus(v2487, v2488); real2 v2499 = ctimesminusplus(reverse(v2489), tbl[438 + tbloffset], ctimes(v2489, tbl[439 + tbloffset])); real2 v2519 = ctimesminusplus(reverse(v2509), tbl[442 + tbloffset], ctimes(v2509, tbl[443 + tbloffset])); store(out, 29 << outShift, plus(v2499, v2519)); real2 v2552 = minus(v2499, v2519); store(out, 93 << outShift, ctimesminusplus(v2552, tbl[0 + tbloffset], ctimes(reverse(v2552), tbl[1 + tbloffset]))); real2 v2505 = ctimesminusplus(reverse(v2491), tbl[440 + tbloffset], ctimes(v2491, tbl[441 + tbloffset])); real2 v2525 = ctimesminusplus(reverse(v2511), tbl[444 + tbloffset], ctimes(v2511, tbl[445 + tbloffset])); store(out, 61 << outShift, plus(v2505, v2525)); real2 v2558 = minus(v2505, v2525); store(out, 125 << outShift, ctimesminusplus(v2558, tbl[0 + tbloffset], ctimes(reverse(v2558), tbl[1 + tbloffset]))); // Pres : 76263 } } ALIGNED(8192) void tbut128f_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; // Pres : 148586 real2 v56 = load(in, 54 << inShift); real2 v120 = load(in, 118 << inShift); real2 v571 = reverse(minus(v56, v120)); real2 v577 = plus(v56, v120); real2 v24 = load(in, 22 << inShift); real2 v88 = load(in, 86 << inShift); real2 v576 = plus(v24, v88); real2 v572 = minus(v88, v24); real2 v573 = minusplus(v571, v572); real2 v575 = minusplus(uminus(v571), v572); real2 v589 = timesminusplus(reverse(v575), load(tbl, 92 * VECWIDTH + tbloffset), times(v575, load(tbl, 93 * VECWIDTH + tbloffset))); real2 v583 = timesminusplus(reverse(v573), load(tbl, 90 * VECWIDTH + tbloffset), times(v573, load(tbl, 91 * VECWIDTH + tbloffset))); real2 v897 = plus(v576, v577); real2 v891 = reverse(minus(v576, v577)); real2 v8 = load(in, 6 << inShift); real2 v72 = load(in, 70 << inShift); real2 v252 = minus(v72, v8); real2 v256 = plus(v8, v72); real2 v104 = load(in, 102 << inShift); real2 v40 = load(in, 38 << inShift); real2 v251 = reverse(minus(v40, v104)); real2 v257 = plus(v40, v104); real2 v255 = minusplus(uminus(v251), v252); real2 v253 = minusplus(v251, v252); real2 v263 = timesminusplus(reverse(v253), load(tbl, 26 * VECWIDTH + tbloffset), times(v253, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v896 = plus(v256, v257); real2 v892 = minus(v257, v256); real2 v895 = minusplus(uminus(v891), v892); real2 v893 = minusplus(v891, v892); real2 v909 = timesminusplus(reverse(v895), load(tbl, 156 * VECWIDTH + tbloffset), times(v895, load(tbl, 157 * VECWIDTH + tbloffset))); real2 v903 = timesminusplus(reverse(v893), load(tbl, 154 * VECWIDTH + tbloffset), times(v893, load(tbl, 155 * VECWIDTH + tbloffset))); real2 v269 = timesminusplus(reverse(v255), load(tbl, 28 * VECWIDTH + tbloffset), times(v255, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v1216 = plus(v896, v897); real2 v1212 = minus(v897, v896); real2 v2160 = minus(v583, v263); real2 v2164 = plus(v263, v583); real2 v2686 = minus(v589, v269); real2 v2690 = plus(v269, v589); real2 v96 = load(in, 94 << inShift); real2 v32 = load(in, 30 << inShift); real2 v736 = plus(v32, v96); real2 v732 = minus(v96, v32); real2 v64 = load(in, 62 << inShift); real2 v128 = load(in, 126 << inShift); real2 v737 = plus(v64, v128); real2 v731 = reverse(minus(v64, v128)); real2 v1057 = plus(v736, v737); real2 v1051 = reverse(minus(v736, v737)); real2 v733 = minusplus(v731, v732); real2 v735 = minusplus(uminus(v731), v732); real2 v749 = timesminusplus(reverse(v735), load(tbl, 124 * VECWIDTH + tbloffset), times(v735, load(tbl, 125 * VECWIDTH + tbloffset))); real2 v743 = timesminusplus(reverse(v733), load(tbl, 122 * VECWIDTH + tbloffset), times(v733, load(tbl, 123 * VECWIDTH + tbloffset))); real2 v16 = load(in, 14 << inShift); real2 v80 = load(in, 78 << inShift); real2 v412 = minus(v80, v16); real2 v416 = plus(v16, v80); real2 v112 = load(in, 110 << inShift); real2 v48 = load(in, 46 << inShift); real2 v417 = plus(v48, v112); real2 v411 = reverse(minus(v48, v112)); real2 v1056 = plus(v416, v417); real2 v1052 = minus(v417, v416); real2 v1055 = minusplus(uminus(v1051), v1052); real2 v1053 = minusplus(v1051, v1052); real2 v1063 = timesminusplus(reverse(v1053), load(tbl, 186 * VECWIDTH + tbloffset), times(v1053, load(tbl, 187 * VECWIDTH + tbloffset))); real2 v1665 = plus(v903, v1063); real2 v1659 = reverse(minus(v903, v1063)); real2 v1069 = timesminusplus(reverse(v1055), load(tbl, 188 * VECWIDTH + tbloffset), times(v1055, load(tbl, 189 * VECWIDTH + tbloffset))); real2 v1869 = reverse(minus(v909, v1069)); real2 v1875 = plus(v909, v1069); real2 v413 = minusplus(v411, v412); real2 v415 = minusplus(uminus(v411), v412); real2 v429 = timesminusplus(reverse(v415), load(tbl, 60 * VECWIDTH + tbloffset), times(v415, load(tbl, 61 * VECWIDTH + tbloffset))); real2 v1217 = plus(v1056, v1057); real2 v1211 = reverse(minus(v1056, v1057)); real2 v1297 = plus(v1216, v1217); real2 v1291 = reverse(minus(v1216, v1217)); real2 v2691 = plus(v429, v749); real2 v2685 = reverse(minus(v429, v749)); real2 v2765 = reverse(minus(v2690, v2691)); real2 v2771 = plus(v2690, v2691); real2 v2689 = minusplus(uminus(v2685), v2686); real2 v2687 = minusplus(v2685, v2686); real2 v2703 = timesminusplus(reverse(v2689), load(tbl, 476 * VECWIDTH + tbloffset), times(v2689, load(tbl, 477 * VECWIDTH + tbloffset))); real2 v2697 = timesminusplus(reverse(v2687), load(tbl, 474 * VECWIDTH + tbloffset), times(v2687, load(tbl, 475 * VECWIDTH + tbloffset))); real2 v1215 = minusplus(uminus(v1211), v1212); real2 v1213 = minusplus(v1211, v1212); real2 v1223 = timesminusplus(reverse(v1213), load(tbl, 218 * VECWIDTH + tbloffset), times(v1213, load(tbl, 219 * VECWIDTH + tbloffset))); real2 v1229 = timesminusplus(reverse(v1215), load(tbl, 220 * VECWIDTH + tbloffset), times(v1215, load(tbl, 221 * VECWIDTH + tbloffset))); real2 v423 = timesminusplus(reverse(v413), load(tbl, 58 * VECWIDTH + tbloffset), times(v413, load(tbl, 59 * VECWIDTH + tbloffset))); real2 v2165 = plus(v423, v743); real2 v2159 = reverse(minus(v423, v743)); real2 v2245 = plus(v2164, v2165); real2 v2239 = reverse(minus(v2164, v2165)); real2 v44 = load(in, 42 << inShift); real2 v108 = load(in, 106 << inShift); real2 v331 = reverse(minus(v44, v108)); real2 v337 = plus(v44, v108); real2 v76 = load(in, 74 << inShift); real2 v12 = load(in, 10 << inShift); real2 v336 = plus(v12, v76); real2 v332 = minus(v76, v12); real2 v976 = plus(v336, v337); real2 v972 = minus(v337, v336); real2 v335 = minusplus(uminus(v331), v332); real2 v333 = minusplus(v331, v332); real2 v343 = timesminusplus(reverse(v333), load(tbl, 42 * VECWIDTH + tbloffset), times(v333, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v349 = timesminusplus(reverse(v335), load(tbl, 44 * VECWIDTH + tbloffset), times(v335, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v124 = load(in, 122 << inShift); real2 v60 = load(in, 58 << inShift); real2 v651 = reverse(minus(v60, v124)); real2 v657 = plus(v60, v124); real2 v28 = load(in, 26 << inShift); real2 v92 = load(in, 90 << inShift); real2 v652 = minus(v92, v28); real2 v656 = plus(v28, v92); real2 v977 = plus(v656, v657); real2 v971 = reverse(minus(v656, v657)); real2 v973 = minusplus(v971, v972); real2 v975 = minusplus(uminus(v971), v972); real2 v983 = timesminusplus(reverse(v973), load(tbl, 170 * VECWIDTH + tbloffset), times(v973, load(tbl, 171 * VECWIDTH + tbloffset))); real2 v1131 = reverse(minus(v976, v977)); real2 v1137 = plus(v976, v977); real2 v655 = minusplus(uminus(v651), v652); real2 v653 = minusplus(v651, v652); real2 v669 = timesminusplus(reverse(v655), load(tbl, 108 * VECWIDTH + tbloffset), times(v655, load(tbl, 109 * VECWIDTH + tbloffset))); real2 v663 = timesminusplus(reverse(v653), load(tbl, 106 * VECWIDTH + tbloffset), times(v653, load(tbl, 107 * VECWIDTH + tbloffset))); real2 v2079 = reverse(minus(v343, v663)); real2 v2085 = plus(v343, v663); real2 v2605 = reverse(minus(v349, v669)); real2 v2611 = plus(v349, v669); real2 v989 = timesminusplus(reverse(v975), load(tbl, 172 * VECWIDTH + tbloffset), times(v975, load(tbl, 173 * VECWIDTH + tbloffset))); real2 v20 = load(in, 18 << inShift); real2 v84 = load(in, 82 << inShift); real2 v496 = plus(v20, v84); real2 v492 = minus(v84, v20); real2 v52 = load(in, 50 << inShift); real2 v116 = load(in, 114 << inShift); real2 v491 = reverse(minus(v52, v116)); real2 v497 = plus(v52, v116); real2 v817 = plus(v496, v497); real2 v811 = reverse(minus(v496, v497)); real2 v493 = minusplus(v491, v492); real2 v495 = minusplus(uminus(v491), v492); real2 v509 = timesminusplus(reverse(v495), load(tbl, 76 * VECWIDTH + tbloffset), times(v495, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v503 = timesminusplus(reverse(v493), load(tbl, 74 * VECWIDTH + tbloffset), times(v493, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v36 = load(in, 34 << inShift); real2 v100 = load(in, 98 << inShift); real2 v171 = reverse(minus(v36, v100)); real2 v177 = plus(v36, v100); real2 v68 = load(in, 66 << inShift); real2 v4 = load(in, 2 << inShift); real2 v176 = plus(v4, v68); real2 v172 = minus(v68, v4); real2 v816 = plus(v176, v177); real2 v812 = minus(v177, v176); real2 v1136 = plus(v816, v817); real2 v1132 = minus(v817, v816); real2 v1133 = minusplus(v1131, v1132); real2 v1135 = minusplus(uminus(v1131), v1132); real2 v1149 = timesminusplus(reverse(v1135), load(tbl, 204 * VECWIDTH + tbloffset), times(v1135, load(tbl, 205 * VECWIDTH + tbloffset))); real2 v1296 = plus(v1136, v1137); real2 v1292 = minus(v1137, v1136); real2 v1295 = minusplus(uminus(v1291), v1292); real2 v1293 = minusplus(v1291, v1292); real2 v1303 = timesminusplus(reverse(v1293), load(tbl, 234 * VECWIDTH + tbloffset), times(v1293, load(tbl, 235 * VECWIDTH + tbloffset))); real2 v1331 = reverse(minus(v1296, v1297)); real2 v1337 = plus(v1296, v1297); real2 v173 = minusplus(v171, v172); real2 v175 = minusplus(uminus(v171), v172); real2 v189 = timesminusplus(reverse(v175), load(tbl, 12 * VECWIDTH + tbloffset), times(v175, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v1309 = timesminusplus(reverse(v1295), load(tbl, 236 * VECWIDTH + tbloffset), times(v1295, load(tbl, 237 * VECWIDTH + tbloffset))); real2 v815 = minusplus(uminus(v811), v812); real2 v813 = minusplus(v811, v812); real2 v1143 = timesminusplus(reverse(v1133), load(tbl, 202 * VECWIDTH + tbloffset), times(v1133, load(tbl, 203 * VECWIDTH + tbloffset))); real2 v1541 = reverse(minus(v1149, v1229)); real2 v1547 = plus(v1149, v1229); real2 v2610 = plus(v189, v509); real2 v2606 = minus(v509, v189); real2 v2770 = plus(v2610, v2611); real2 v2766 = minus(v2611, v2610); real2 v823 = timesminusplus(reverse(v813), load(tbl, 138 * VECWIDTH + tbloffset), times(v813, load(tbl, 139 * VECWIDTH + tbloffset))); real2 v829 = timesminusplus(reverse(v815), load(tbl, 140 * VECWIDTH + tbloffset), times(v815, load(tbl, 141 * VECWIDTH + tbloffset))); real2 v2811 = plus(v2770, v2771); real2 v2805 = reverse(minus(v2770, v2771)); real2 v2767 = minusplus(v2765, v2766); real2 v2769 = minusplus(uminus(v2765), v2766); real2 v2607 = minusplus(v2605, v2606); real2 v2609 = minusplus(uminus(v2605), v2606); real2 v2617 = timesminusplus(reverse(v2607), load(tbl, 458 * VECWIDTH + tbloffset), times(v2607, load(tbl, 459 * VECWIDTH + tbloffset))); real2 v2623 = timesminusplus(reverse(v2609), load(tbl, 460 * VECWIDTH + tbloffset), times(v2609, load(tbl, 461 * VECWIDTH + tbloffset))); real2 v3013 = reverse(minus(v2623, v2703)); real2 v3019 = plus(v2623, v2703); real2 v2783 = timesminusplus(reverse(v2769), load(tbl, 492 * VECWIDTH + tbloffset), times(v2769, load(tbl, 493 * VECWIDTH + tbloffset))); real2 v2941 = plus(v2617, v2697); real2 v2935 = reverse(minus(v2617, v2697)); real2 v2777 = timesminusplus(reverse(v2767), load(tbl, 490 * VECWIDTH + tbloffset), times(v2767, load(tbl, 491 * VECWIDTH + tbloffset))); real2 v1660 = minus(v983, v823); real2 v1664 = plus(v823, v983); real2 v1874 = plus(v829, v989); real2 v1870 = minus(v989, v829); real2 v1909 = reverse(minus(v1874, v1875)); real2 v1915 = plus(v1874, v1875); real2 v1663 = minusplus(uminus(v1659), v1660); real2 v1661 = minusplus(v1659, v1660); real2 v1677 = timesminusplus(reverse(v1663), load(tbl, 296 * VECWIDTH + tbloffset), times(v1663, load(tbl, 297 * VECWIDTH + tbloffset))); real2 v1873 = minusplus(uminus(v1869), v1870); real2 v1871 = minusplus(v1869, v1870); real2 v1887 = timesminusplus(reverse(v1873), load(tbl, 332 * VECWIDTH + tbloffset), times(v1873, load(tbl, 333 * VECWIDTH + tbloffset))); real2 v1705 = plus(v1664, v1665); real2 v1699 = reverse(minus(v1664, v1665)); real2 v1671 = timesminusplus(reverse(v1661), load(tbl, 294 * VECWIDTH + tbloffset), times(v1661, load(tbl, 295 * VECWIDTH + tbloffset))); real2 v1881 = timesminusplus(reverse(v1871), load(tbl, 330 * VECWIDTH + tbloffset), times(v1871, load(tbl, 331 * VECWIDTH + tbloffset))); real2 v1469 = plus(v1143, v1223); real2 v1463 = reverse(minus(v1143, v1223)); real2 v54 = load(in, 52 << inShift); real2 v118 = load(in, 116 << inShift); real2 v537 = plus(v54, v118); real2 v531 = reverse(minus(v54, v118)); real2 v86 = load(in, 84 << inShift); real2 v22 = load(in, 20 << inShift); real2 v536 = plus(v22, v86); real2 v532 = minus(v86, v22); real2 v851 = reverse(minus(v536, v537)); real2 v857 = plus(v536, v537); real2 v533 = minusplus(v531, v532); real2 v535 = minusplus(uminus(v531), v532); real2 v549 = timesminusplus(reverse(v535), load(tbl, 84 * VECWIDTH + tbloffset), times(v535, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v102 = load(in, 100 << inShift); real2 v38 = load(in, 36 << inShift); real2 v217 = plus(v38, v102); real2 v211 = reverse(minus(v38, v102)); real2 v70 = load(in, 68 << inShift); real2 v6 = load(in, 4 << inShift); real2 v216 = plus(v6, v70); real2 v212 = minus(v70, v6); real2 v213 = minusplus(v211, v212); real2 v215 = minusplus(uminus(v211), v212); real2 v229 = timesminusplus(reverse(v215), load(tbl, 20 * VECWIDTH + tbloffset), times(v215, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v2646 = minus(v549, v229); real2 v2650 = plus(v229, v549); real2 v856 = plus(v216, v217); real2 v852 = minus(v217, v216); real2 v853 = minusplus(v851, v852); real2 v855 = minusplus(uminus(v851), v852); real2 v863 = timesminusplus(reverse(v853), load(tbl, 146 * VECWIDTH + tbloffset), times(v853, load(tbl, 147 * VECWIDTH + tbloffset))); real2 v869 = timesminusplus(reverse(v855), load(tbl, 148 * VECWIDTH + tbloffset), times(v855, load(tbl, 149 * VECWIDTH + tbloffset))); real2 v1176 = plus(v856, v857); real2 v1172 = minus(v857, v856); real2 v110 = load(in, 108 << inShift); real2 v46 = load(in, 44 << inShift); real2 v377 = plus(v46, v110); real2 v371 = reverse(minus(v46, v110)); real2 v78 = load(in, 76 << inShift); real2 v14 = load(in, 12 << inShift); real2 v372 = minus(v78, v14); real2 v376 = plus(v14, v78); real2 v1012 = minus(v377, v376); real2 v1016 = plus(v376, v377); real2 v373 = minusplus(v371, v372); real2 v375 = minusplus(uminus(v371), v372); real2 v389 = timesminusplus(reverse(v375), load(tbl, 52 * VECWIDTH + tbloffset), times(v375, load(tbl, 53 * VECWIDTH + tbloffset))); real2 v30 = load(in, 28 << inShift); real2 v94 = load(in, 92 << inShift); real2 v696 = plus(v30, v94); real2 v692 = minus(v94, v30); real2 v62 = load(in, 60 << inShift); real2 v126 = load(in, 124 << inShift); real2 v697 = plus(v62, v126); real2 v691 = reverse(minus(v62, v126)); real2 v1017 = plus(v696, v697); real2 v1011 = reverse(minus(v696, v697)); real2 v1171 = reverse(minus(v1016, v1017)); real2 v1177 = plus(v1016, v1017); real2 v1013 = minusplus(v1011, v1012); real2 v1015 = minusplus(uminus(v1011), v1012); real2 v1175 = minusplus(uminus(v1171), v1172); real2 v1173 = minusplus(v1171, v1172); real2 v1183 = timesminusplus(reverse(v1173), load(tbl, 210 * VECWIDTH + tbloffset), times(v1173, load(tbl, 211 * VECWIDTH + tbloffset))); real2 v1189 = timesminusplus(reverse(v1175), load(tbl, 212 * VECWIDTH + tbloffset), times(v1175, load(tbl, 213 * VECWIDTH + tbloffset))); real2 v1029 = timesminusplus(reverse(v1015), load(tbl, 180 * VECWIDTH + tbloffset), times(v1015, load(tbl, 181 * VECWIDTH + tbloffset))); real2 v1023 = timesminusplus(reverse(v1013), load(tbl, 178 * VECWIDTH + tbloffset), times(v1013, load(tbl, 179 * VECWIDTH + tbloffset))); real2 v1625 = plus(v863, v1023); real2 v1619 = reverse(minus(v863, v1023)); real2 v1835 = plus(v869, v1029); real2 v1829 = reverse(minus(v869, v1029)); real2 v693 = minusplus(v691, v692); real2 v695 = minusplus(uminus(v691), v692); real2 v709 = timesminusplus(reverse(v695), load(tbl, 116 * VECWIDTH + tbloffset), times(v695, load(tbl, 117 * VECWIDTH + tbloffset))); real2 v2645 = reverse(minus(v389, v709)); real2 v2651 = plus(v389, v709); real2 v1257 = plus(v1176, v1177); real2 v1251 = reverse(minus(v1176, v1177)); real2 v2731 = plus(v2650, v2651); real2 v2725 = reverse(minus(v2650, v2651)); real2 v114 = load(in, 112 << inShift); real2 v50 = load(in, 48 << inShift); real2 v457 = plus(v50, v114); real2 v451 = reverse(minus(v50, v114)); real2 v18 = load(in, 16 << inShift); real2 v82 = load(in, 80 << inShift); real2 v456 = plus(v18, v82); real2 v452 = minus(v82, v18); real2 v771 = reverse(minus(v456, v457)); real2 v777 = plus(v456, v457); real2 v453 = minusplus(v451, v452); real2 v455 = minusplus(uminus(v451), v452); real2 v469 = timesminusplus(reverse(v455), load(tbl, 68 * VECWIDTH + tbloffset), times(v455, load(tbl, 69 * VECWIDTH + tbloffset))); real2 v66 = load(in, 64 << inShift); real2 v2 = load(in, 0 << inShift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << inShift); real2 v34 = load(in, 32 << inShift); real2 v131 = reverse(minus(v34, v98)); real2 v137 = plus(v34, v98); real2 v133 = minusplus(v131, v132); real2 v135 = minusplus(uminus(v131), v132); real2 v149 = timesminusplus(reverse(v135), load(tbl, 4 * VECWIDTH + tbloffset), times(v135, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v2566 = minus(v469, v149); real2 v2570 = plus(v149, v469); real2 v772 = minus(v137, v136); real2 v776 = plus(v136, v137); real2 v1092 = minus(v777, v776); real2 v1096 = plus(v776, v777); real2 v773 = minusplus(v771, v772); real2 v775 = minusplus(uminus(v771), v772); real2 v783 = timesminusplus(reverse(v773), load(tbl, 130 * VECWIDTH + tbloffset), times(v773, load(tbl, 131 * VECWIDTH + tbloffset))); real2 v789 = timesminusplus(reverse(v775), load(tbl, 132 * VECWIDTH + tbloffset), times(v775, load(tbl, 133 * VECWIDTH + tbloffset))); real2 v74 = load(in, 72 << inShift); real2 v10 = load(in, 8 << inShift); real2 v296 = plus(v10, v74); real2 v292 = minus(v74, v10); real2 v42 = load(in, 40 << inShift); real2 v106 = load(in, 104 << inShift); real2 v291 = reverse(minus(v42, v106)); real2 v297 = plus(v42, v106); real2 v293 = minusplus(v291, v292); real2 v295 = minusplus(uminus(v291), v292); real2 v309 = timesminusplus(reverse(v295), load(tbl, 36 * VECWIDTH + tbloffset), times(v295, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v932 = minus(v297, v296); real2 v936 = plus(v296, v297); real2 v122 = load(in, 120 << inShift); real2 v58 = load(in, 56 << inShift); real2 v617 = plus(v58, v122); real2 v611 = reverse(minus(v58, v122)); real2 v26 = load(in, 24 << inShift); real2 v90 = load(in, 88 << inShift); real2 v612 = minus(v90, v26); real2 v616 = plus(v26, v90); real2 v937 = plus(v616, v617); real2 v931 = reverse(minus(v616, v617)); real2 v1091 = reverse(minus(v936, v937)); real2 v1097 = plus(v936, v937); real2 v933 = minusplus(v931, v932); real2 v935 = minusplus(uminus(v931), v932); real2 v1093 = minusplus(v1091, v1092); real2 v1095 = minusplus(uminus(v1091), v1092); real2 v1103 = timesminusplus(reverse(v1093), load(tbl, 194 * VECWIDTH + tbloffset), times(v1093, load(tbl, 195 * VECWIDTH + tbloffset))); real2 v1468 = plus(v1103, v1183); real2 v1464 = minus(v1183, v1103); real2 v1508 = plus(v1468, v1469); real2 v1504 = minus(v1469, v1468); real2 v1252 = minus(v1097, v1096); real2 v1256 = plus(v1096, v1097); real2 v1336 = plus(v1256, v1257); real2 v1332 = minus(v1257, v1256); real2 v1335 = minusplus(uminus(v1331), v1332); real2 v1333 = minusplus(v1331, v1332); real2 v1343 = timesminusplus(reverse(v1333), load(tbl, 242 * VECWIDTH + tbloffset), times(v1333, load(tbl, 243 * VECWIDTH + tbloffset))); real2 v1349 = timesminusplus(reverse(v1335), load(tbl, 244 * VECWIDTH + tbloffset), times(v1335, load(tbl, 245 * VECWIDTH + tbloffset))); real2 v1376 = plus(v1336, v1337); real2 v1372 = minus(v1337, v1336); real2 v1465 = minusplus(v1463, v1464); real2 v1467 = minusplus(uminus(v1463), v1464); real2 v1255 = minusplus(uminus(v1251), v1252); real2 v1253 = minusplus(v1251, v1252); real2 v1481 = timesminusplus(reverse(v1467), load(tbl, 264 * VECWIDTH + tbloffset), times(v1467, load(tbl, 265 * VECWIDTH + tbloffset))); real2 v1475 = timesminusplus(reverse(v1465), load(tbl, 262 * VECWIDTH + tbloffset), times(v1465, load(tbl, 263 * VECWIDTH + tbloffset))); real2 v1109 = timesminusplus(reverse(v1095), load(tbl, 196 * VECWIDTH + tbloffset), times(v1095, load(tbl, 197 * VECWIDTH + tbloffset))); real2 v1542 = minus(v1189, v1109); real2 v1546 = plus(v1109, v1189); real2 v1545 = minusplus(uminus(v1541), v1542); real2 v1543 = minusplus(v1541, v1542); real2 v1553 = timesminusplus(reverse(v1543), load(tbl, 274 * VECWIDTH + tbloffset), times(v1543, load(tbl, 275 * VECWIDTH + tbloffset))); real2 v1559 = timesminusplus(reverse(v1545), load(tbl, 276 * VECWIDTH + tbloffset), times(v1545, load(tbl, 277 * VECWIDTH + tbloffset))); real2 v1582 = minus(v1547, v1546); real2 v1586 = plus(v1546, v1547); real2 v1269 = timesminusplus(reverse(v1255), load(tbl, 228 * VECWIDTH + tbloffset), times(v1255, load(tbl, 229 * VECWIDTH + tbloffset))); real2 v1438 = minus(v1309, v1269); real2 v1442 = plus(v1269, v1309); real2 v1263 = timesminusplus(reverse(v1253), load(tbl, 226 * VECWIDTH + tbloffset), times(v1253, load(tbl, 227 * VECWIDTH + tbloffset))); real2 v943 = timesminusplus(reverse(v933), load(tbl, 162 * VECWIDTH + tbloffset), times(v933, load(tbl, 163 * VECWIDTH + tbloffset))); real2 v1624 = plus(v783, v943); real2 v1620 = minus(v943, v783); real2 v1623 = minusplus(uminus(v1619), v1620); real2 v1621 = minusplus(v1619, v1620); real2 v1700 = minus(v1625, v1624); real2 v1704 = plus(v1624, v1625); real2 v1631 = timesminusplus(reverse(v1621), load(tbl, 286 * VECWIDTH + tbloffset), times(v1621, load(tbl, 287 * VECWIDTH + tbloffset))); real2 v949 = timesminusplus(reverse(v935), load(tbl, 164 * VECWIDTH + tbloffset), times(v935, load(tbl, 165 * VECWIDTH + tbloffset))); real2 v1830 = minus(v949, v789); real2 v1834 = plus(v789, v949); real2 v1782 = plus(v1631, v1671); real2 v1778 = minus(v1671, v1631); real2 v1910 = minus(v1835, v1834); real2 v1914 = plus(v1834, v1835); real2 v1950 = minus(v1915, v1914); real2 v1954 = plus(v1914, v1915); real2 v1913 = minusplus(uminus(v1909), v1910); real2 v1911 = minusplus(v1909, v1910); real2 v613 = minusplus(v611, v612); real2 v615 = minusplus(uminus(v611), v612); real2 v629 = timesminusplus(reverse(v615), load(tbl, 100 * VECWIDTH + tbloffset), times(v615, load(tbl, 101 * VECWIDTH + tbloffset))); real2 v1744 = plus(v1704, v1705); real2 v1740 = minus(v1705, v1704); real2 v1637 = timesminusplus(reverse(v1623), load(tbl, 288 * VECWIDTH + tbloffset), times(v1623, load(tbl, 289 * VECWIDTH + tbloffset))); real2 v1927 = timesminusplus(reverse(v1913), load(tbl, 340 * VECWIDTH + tbloffset), times(v1913, load(tbl, 341 * VECWIDTH + tbloffset))); real2 v2571 = plus(v309, v629); real2 v2565 = reverse(minus(v309, v629)); real2 v1833 = minusplus(uminus(v1829), v1830); real2 v1831 = minusplus(v1829, v1830); real2 v1921 = timesminusplus(reverse(v1911), load(tbl, 338 * VECWIDTH + tbloffset), times(v1911, load(tbl, 339 * VECWIDTH + tbloffset))); real2 v1804 = minus(v1677, v1637); real2 v1808 = plus(v1637, v1677); real2 v1847 = timesminusplus(reverse(v1833), load(tbl, 324 * VECWIDTH + tbloffset), times(v1833, load(tbl, 325 * VECWIDTH + tbloffset))); real2 v2014 = minus(v1887, v1847); real2 v2018 = plus(v1847, v1887); real2 v1841 = timesminusplus(reverse(v1831), load(tbl, 322 * VECWIDTH + tbloffset), times(v1831, load(tbl, 323 * VECWIDTH + tbloffset))); real2 v1988 = minus(v1881, v1841); real2 v1992 = plus(v1841, v1881); real2 v1703 = minusplus(uminus(v1699), v1700); real2 v1701 = minusplus(v1699, v1700); real2 v1717 = timesminusplus(reverse(v1703), load(tbl, 304 * VECWIDTH + tbloffset), times(v1703, load(tbl, 305 * VECWIDTH + tbloffset))); real2 v1711 = timesminusplus(reverse(v1701), load(tbl, 302 * VECWIDTH + tbloffset), times(v1701, load(tbl, 303 * VECWIDTH + tbloffset))); real2 v2730 = plus(v2570, v2571); real2 v2726 = minus(v2571, v2570); real2 v1412 = minus(v1303, v1263); real2 v1416 = plus(v1263, v1303); real2 v63 = load(in, 61 << inShift); real2 v127 = load(in, 125 << inShift); real2 v717 = plus(v63, v127); real2 v711 = reverse(minus(v63, v127)); real2 v95 = load(in, 93 << inShift); real2 v31 = load(in, 29 << inShift); real2 v712 = minus(v95, v31); real2 v716 = plus(v31, v95); real2 v1037 = plus(v716, v717); real2 v1031 = reverse(minus(v716, v717)); real2 v79 = load(in, 77 << inShift); real2 v15 = load(in, 13 << inShift); real2 v396 = plus(v15, v79); real2 v392 = minus(v79, v15); real2 v111 = load(in, 109 << inShift); real2 v47 = load(in, 45 << inShift); real2 v397 = plus(v47, v111); real2 v391 = reverse(minus(v47, v111)); real2 v1032 = minus(v397, v396); real2 v1036 = plus(v396, v397); real2 v1033 = minusplus(v1031, v1032); real2 v1035 = minusplus(uminus(v1031), v1032); real2 v1049 = timesminusplus(reverse(v1035), load(tbl, 184 * VECWIDTH + tbloffset), times(v1035, load(tbl, 185 * VECWIDTH + tbloffset))); real2 v1043 = timesminusplus(reverse(v1033), load(tbl, 182 * VECWIDTH + tbloffset), times(v1033, load(tbl, 183 * VECWIDTH + tbloffset))); real2 v1197 = plus(v1036, v1037); real2 v1191 = reverse(minus(v1036, v1037)); real2 v23 = load(in, 21 << inShift); real2 v87 = load(in, 85 << inShift); real2 v556 = plus(v23, v87); real2 v552 = minus(v87, v23); real2 v119 = load(in, 117 << inShift); real2 v55 = load(in, 53 << inShift); real2 v557 = plus(v55, v119); real2 v551 = reverse(minus(v55, v119)); real2 v877 = plus(v556, v557); real2 v871 = reverse(minus(v556, v557)); real2 v7 = load(in, 5 << inShift); real2 v71 = load(in, 69 << inShift); real2 v232 = minus(v71, v7); real2 v236 = plus(v7, v71); real2 v103 = load(in, 101 << inShift); real2 v39 = load(in, 37 << inShift); real2 v237 = plus(v39, v103); real2 v231 = reverse(minus(v39, v103)); real2 v876 = plus(v236, v237); real2 v872 = minus(v237, v236); real2 v1192 = minus(v877, v876); real2 v1196 = plus(v876, v877); real2 v1271 = reverse(minus(v1196, v1197)); real2 v1277 = plus(v1196, v1197); real2 v875 = minusplus(uminus(v871), v872); real2 v873 = minusplus(v871, v872); real2 v883 = timesminusplus(reverse(v873), load(tbl, 150 * VECWIDTH + tbloffset), times(v873, load(tbl, 151 * VECWIDTH + tbloffset))); real2 v1639 = reverse(minus(v883, v1043)); real2 v1645 = plus(v883, v1043); real2 v1195 = minusplus(uminus(v1191), v1192); real2 v1193 = minusplus(v1191, v1192); real2 v1209 = timesminusplus(reverse(v1195), load(tbl, 216 * VECWIDTH + tbloffset), times(v1195, load(tbl, 217 * VECWIDTH + tbloffset))); real2 v1203 = timesminusplus(reverse(v1193), load(tbl, 214 * VECWIDTH + tbloffset), times(v1193, load(tbl, 215 * VECWIDTH + tbloffset))); real2 v83 = load(in, 81 << inShift); real2 v19 = load(in, 17 << inShift); real2 v476 = plus(v19, v83); real2 v472 = minus(v83, v19); real2 v51 = load(in, 49 << inShift); real2 v115 = load(in, 113 << inShift); real2 v477 = plus(v51, v115); real2 v471 = reverse(minus(v51, v115)); real2 v797 = plus(v476, v477); real2 v791 = reverse(minus(v476, v477)); real2 v3 = load(in, 1 << inShift); real2 v67 = load(in, 65 << inShift); real2 v156 = plus(v3, v67); real2 v152 = minus(v67, v3); real2 v35 = load(in, 33 << inShift); real2 v99 = load(in, 97 << inShift); real2 v157 = plus(v35, v99); real2 v151 = reverse(minus(v35, v99)); real2 v792 = minus(v157, v156); real2 v796 = plus(v156, v157); real2 v793 = minusplus(v791, v792); real2 v795 = minusplus(uminus(v791), v792); real2 v803 = timesminusplus(reverse(v793), load(tbl, 134 * VECWIDTH + tbloffset), times(v793, load(tbl, 135 * VECWIDTH + tbloffset))); real2 v1112 = minus(v797, v796); real2 v1116 = plus(v796, v797); real2 v107 = load(in, 105 << inShift); real2 v43 = load(in, 41 << inShift); real2 v317 = plus(v43, v107); real2 v311 = reverse(minus(v43, v107)); real2 v75 = load(in, 73 << inShift); real2 v11 = load(in, 9 << inShift); real2 v316 = plus(v11, v75); real2 v312 = minus(v75, v11); real2 v956 = plus(v316, v317); real2 v952 = minus(v317, v316); real2 v59 = load(in, 57 << inShift); real2 v123 = load(in, 121 << inShift); real2 v631 = reverse(minus(v59, v123)); real2 v637 = plus(v59, v123); real2 v27 = load(in, 25 << inShift); real2 v91 = load(in, 89 << inShift); real2 v636 = plus(v27, v91); real2 v632 = minus(v91, v27); real2 v957 = plus(v636, v637); real2 v951 = reverse(minus(v636, v637)); real2 v1111 = reverse(minus(v956, v957)); real2 v1117 = plus(v956, v957); real2 v1276 = plus(v1116, v1117); real2 v1272 = minus(v1117, v1116); real2 v1275 = minusplus(uminus(v1271), v1272); real2 v1273 = minusplus(v1271, v1272); real2 v1283 = timesminusplus(reverse(v1273), load(tbl, 230 * VECWIDTH + tbloffset), times(v1273, load(tbl, 231 * VECWIDTH + tbloffset))); real2 v1352 = minus(v1277, v1276); real2 v1356 = plus(v1276, v1277); real2 v1289 = timesminusplus(reverse(v1275), load(tbl, 232 * VECWIDTH + tbloffset), times(v1275, load(tbl, 233 * VECWIDTH + tbloffset))); real2 v1115 = minusplus(uminus(v1111), v1112); real2 v1113 = minusplus(v1111, v1112); real2 v1123 = timesminusplus(reverse(v1113), load(tbl, 198 * VECWIDTH + tbloffset), times(v1113, load(tbl, 199 * VECWIDTH + tbloffset))); real2 v1129 = timesminusplus(reverse(v1115), load(tbl, 200 * VECWIDTH + tbloffset), times(v1115, load(tbl, 201 * VECWIDTH + tbloffset))); real2 v1488 = plus(v1123, v1203); real2 v1484 = minus(v1203, v1123); real2 v1566 = plus(v1129, v1209); real2 v1562 = minus(v1209, v1129); real2 v85 = load(in, 83 << inShift); real2 v21 = load(in, 19 << inShift); real2 v512 = minus(v85, v21); real2 v516 = plus(v21, v85); real2 v117 = load(in, 115 << inShift); real2 v53 = load(in, 51 << inShift); real2 v517 = plus(v53, v117); real2 v511 = reverse(minus(v53, v117)); real2 v831 = reverse(minus(v516, v517)); real2 v837 = plus(v516, v517); real2 v69 = load(in, 67 << inShift); real2 v5 = load(in, 3 << inShift); real2 v192 = minus(v69, v5); real2 v196 = plus(v5, v69); real2 v37 = load(in, 35 << inShift); real2 v101 = load(in, 99 << inShift); real2 v197 = plus(v37, v101); real2 v191 = reverse(minus(v37, v101)); real2 v832 = minus(v197, v196); real2 v836 = plus(v196, v197); real2 v1152 = minus(v837, v836); real2 v1156 = plus(v836, v837); real2 v61 = load(in, 59 << inShift); real2 v125 = load(in, 123 << inShift); real2 v677 = plus(v61, v125); real2 v671 = reverse(minus(v61, v125)); real2 v29 = load(in, 27 << inShift); real2 v93 = load(in, 91 << inShift); real2 v672 = minus(v93, v29); real2 v676 = plus(v29, v93); real2 v997 = plus(v676, v677); real2 v991 = reverse(minus(v676, v677)); real2 v109 = load(in, 107 << inShift); real2 v45 = load(in, 43 << inShift); real2 v357 = plus(v45, v109); real2 v351 = reverse(minus(v45, v109)); real2 v77 = load(in, 75 << inShift); real2 v13 = load(in, 11 << inShift); real2 v352 = minus(v77, v13); real2 v356 = plus(v13, v77); real2 v992 = minus(v357, v356); real2 v996 = plus(v356, v357); real2 v1157 = plus(v996, v997); real2 v1151 = reverse(minus(v996, v997)); real2 v1155 = minusplus(uminus(v1151), v1152); real2 v1153 = minusplus(v1151, v1152); real2 v1163 = timesminusplus(reverse(v1153), load(tbl, 206 * VECWIDTH + tbloffset), times(v1153, load(tbl, 207 * VECWIDTH + tbloffset))); real2 v1316 = plus(v1156, v1157); real2 v1312 = minus(v1157, v1156); real2 v41 = load(in, 39 << inShift); real2 v105 = load(in, 103 << inShift); real2 v277 = plus(v41, v105); real2 v271 = reverse(minus(v41, v105)); real2 v9 = load(in, 7 << inShift); real2 v73 = load(in, 71 << inShift); real2 v276 = plus(v9, v73); real2 v272 = minus(v73, v9); real2 v916 = plus(v276, v277); real2 v912 = minus(v277, v276); real2 v89 = load(in, 87 << inShift); real2 v25 = load(in, 23 << inShift); real2 v592 = minus(v89, v25); real2 v596 = plus(v25, v89); real2 v57 = load(in, 55 << inShift); real2 v121 = load(in, 119 << inShift); real2 v591 = reverse(minus(v57, v121)); real2 v597 = plus(v57, v121); real2 v911 = reverse(minus(v596, v597)); real2 v917 = plus(v596, v597); real2 v1236 = plus(v916, v917); real2 v1232 = minus(v917, v916); real2 v81 = load(in, 79 << inShift); real2 v17 = load(in, 15 << inShift); real2 v432 = minus(v81, v17); real2 v436 = plus(v17, v81); real2 v113 = load(in, 111 << inShift); real2 v49 = load(in, 47 << inShift); real2 v437 = plus(v49, v113); real2 v431 = reverse(minus(v49, v113)); real2 v1072 = minus(v437, v436); real2 v1076 = plus(v436, v437); real2 v65 = load(in, 63 << inShift); real2 v129 = load(in, 127 << inShift); real2 v757 = plus(v65, v129); real2 v751 = reverse(minus(v65, v129)); real2 v97 = load(in, 95 << inShift); real2 v33 = load(in, 31 << inShift); real2 v752 = minus(v97, v33); real2 v756 = plus(v33, v97); real2 v1077 = plus(v756, v757); real2 v1071 = reverse(minus(v756, v757)); real2 v1231 = reverse(minus(v1076, v1077)); real2 v1237 = plus(v1076, v1077); real2 v1317 = plus(v1236, v1237); real2 v1311 = reverse(minus(v1236, v1237)); real2 v1351 = reverse(minus(v1316, v1317)); real2 v1357 = plus(v1316, v1317); real2 v1371 = reverse(minus(v1356, v1357)); real2 v1377 = plus(v1356, v1357); scatter(out, 0, 128, plus(v1376, v1377)); real2 v1390 = minus(v1376, v1377); scatter(out, 64, 128, timesminusplus(v1390, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1390), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1353 = minusplus(v1351, v1352); real2 v1355 = minusplus(uminus(v1351), v1352); real2 v1369 = timesminusplus(reverse(v1355), load(tbl, 248 * VECWIDTH + tbloffset), times(v1355, load(tbl, 249 * VECWIDTH + tbloffset))); scatter(out, 48, 128, plus(v1349, v1369)); real2 v1404 = minus(v1349, v1369); scatter(out, 112, 128, timesminusplus(v1404, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1404), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1363 = timesminusplus(reverse(v1353), load(tbl, 246 * VECWIDTH + tbloffset), times(v1353, load(tbl, 247 * VECWIDTH + tbloffset))); scatter(out, 16, 128, plus(v1343, v1363)); real2 v1398 = minus(v1343, v1363); scatter(out, 80, 128, timesminusplus(v1398, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1398), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1373 = minusplus(v1371, v1372); real2 v1375 = minusplus(uminus(v1371), v1372); scatter(out, 96, 128, timesminusplus(reverse(v1375), load(tbl, 252 * VECWIDTH + tbloffset), times(v1375, load(tbl, 253 * VECWIDTH + tbloffset)))); scatter(out, 32, 128, timesminusplus(reverse(v1373), load(tbl, 250 * VECWIDTH + tbloffset), times(v1373, load(tbl, 251 * VECWIDTH + tbloffset)))); real2 v1313 = minusplus(v1311, v1312); real2 v1315 = minusplus(uminus(v1311), v1312); real2 v1323 = timesminusplus(reverse(v1313), load(tbl, 238 * VECWIDTH + tbloffset), times(v1313, load(tbl, 239 * VECWIDTH + tbloffset))); real2 v1417 = plus(v1283, v1323); real2 v1411 = reverse(minus(v1283, v1323)); scatter(out, 8, 128, plus(v1416, v1417)); real2 v1430 = minus(v1416, v1417); scatter(out, 72, 128, timesminusplus(v1430, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1430), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1413 = minusplus(v1411, v1412); real2 v1415 = minusplus(uminus(v1411), v1412); scatter(out, 104, 128, timesminusplus(reverse(v1415), load(tbl, 256 * VECWIDTH + tbloffset), times(v1415, load(tbl, 257 * VECWIDTH + tbloffset)))); scatter(out, 40, 128, timesminusplus(reverse(v1413), load(tbl, 254 * VECWIDTH + tbloffset), times(v1413, load(tbl, 255 * VECWIDTH + tbloffset)))); real2 v1329 = timesminusplus(reverse(v1315), load(tbl, 240 * VECWIDTH + tbloffset), times(v1315, load(tbl, 241 * VECWIDTH + tbloffset))); real2 v1443 = plus(v1289, v1329); real2 v1437 = reverse(minus(v1289, v1329)); scatter(out, 24, 128, plus(v1442, v1443)); real2 v1456 = minus(v1442, v1443); scatter(out, 88, 128, timesminusplus(v1456, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1456), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1441 = minusplus(uminus(v1437), v1438); real2 v1439 = minusplus(v1437, v1438); scatter(out, 120, 128, timesminusplus(reverse(v1441), load(tbl, 260 * VECWIDTH + tbloffset), times(v1441, load(tbl, 261 * VECWIDTH + tbloffset)))); scatter(out, 56, 128, timesminusplus(reverse(v1439), load(tbl, 258 * VECWIDTH + tbloffset), times(v1439, load(tbl, 259 * VECWIDTH + tbloffset)))); real2 v1235 = minusplus(uminus(v1231), v1232); real2 v1233 = minusplus(v1231, v1232); real2 v1243 = timesminusplus(reverse(v1233), load(tbl, 222 * VECWIDTH + tbloffset), times(v1233, load(tbl, 223 * VECWIDTH + tbloffset))); real2 v1489 = plus(v1163, v1243); real2 v1483 = reverse(minus(v1163, v1243)); real2 v1509 = plus(v1488, v1489); real2 v1503 = reverse(minus(v1488, v1489)); scatter(out, 4, 128, plus(v1508, v1509)); real2 v1522 = minus(v1508, v1509); scatter(out, 68, 128, timesminusplus(v1522, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1522), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1507 = minusplus(uminus(v1503), v1504); real2 v1505 = minusplus(v1503, v1504); scatter(out, 36, 128, timesminusplus(reverse(v1505), load(tbl, 270 * VECWIDTH + tbloffset), times(v1505, load(tbl, 271 * VECWIDTH + tbloffset)))); scatter(out, 100, 128, timesminusplus(reverse(v1507), load(tbl, 272 * VECWIDTH + tbloffset), times(v1507, load(tbl, 273 * VECWIDTH + tbloffset)))); real2 v1485 = minusplus(v1483, v1484); real2 v1487 = minusplus(uminus(v1483), v1484); real2 v1501 = timesminusplus(reverse(v1487), load(tbl, 268 * VECWIDTH + tbloffset), times(v1487, load(tbl, 269 * VECWIDTH + tbloffset))); scatter(out, 52, 128, plus(v1481, v1501)); real2 v1534 = minus(v1481, v1501); scatter(out, 116, 128, timesminusplus(v1534, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1534), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1495 = timesminusplus(reverse(v1485), load(tbl, 266 * VECWIDTH + tbloffset), times(v1485, load(tbl, 267 * VECWIDTH + tbloffset))); scatter(out, 20, 128, plus(v1475, v1495)); real2 v1528 = minus(v1475, v1495); scatter(out, 84, 128, timesminusplus(v1528, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1528), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1249 = timesminusplus(reverse(v1235), load(tbl, 224 * VECWIDTH + tbloffset), times(v1235, load(tbl, 225 * VECWIDTH + tbloffset))); real2 v1169 = timesminusplus(reverse(v1155), load(tbl, 208 * VECWIDTH + tbloffset), times(v1155, load(tbl, 209 * VECWIDTH + tbloffset))); real2 v1567 = plus(v1169, v1249); real2 v1561 = reverse(minus(v1169, v1249)); real2 v1581 = reverse(minus(v1566, v1567)); real2 v1587 = plus(v1566, v1567); scatter(out, 12, 128, plus(v1586, v1587)); real2 v1600 = minus(v1586, v1587); scatter(out, 76, 128, timesminusplus(v1600, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1600), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1583 = minusplus(v1581, v1582); scatter(out, 44, 128, timesminusplus(reverse(v1583), load(tbl, 282 * VECWIDTH + tbloffset), times(v1583, load(tbl, 283 * VECWIDTH + tbloffset)))); real2 v1585 = minusplus(uminus(v1581), v1582); scatter(out, 108, 128, timesminusplus(reverse(v1585), load(tbl, 284 * VECWIDTH + tbloffset), times(v1585, load(tbl, 285 * VECWIDTH + tbloffset)))); real2 v1565 = minusplus(uminus(v1561), v1562); real2 v1563 = minusplus(v1561, v1562); real2 v1579 = timesminusplus(reverse(v1565), load(tbl, 280 * VECWIDTH + tbloffset), times(v1565, load(tbl, 281 * VECWIDTH + tbloffset))); scatter(out, 60, 128, plus(v1559, v1579)); real2 v1612 = minus(v1559, v1579); scatter(out, 124, 128, timesminusplus(v1612, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1612), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1573 = timesminusplus(reverse(v1563), load(tbl, 278 * VECWIDTH + tbloffset), times(v1563, load(tbl, 279 * VECWIDTH + tbloffset))); scatter(out, 28, 128, plus(v1553, v1573)); real2 v1606 = minus(v1553, v1573); scatter(out, 92, 128, timesminusplus(v1606, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1606), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v833 = minusplus(v831, v832); real2 v835 = minusplus(uminus(v831), v832); real2 v955 = minusplus(uminus(v951), v952); real2 v953 = minusplus(v951, v952); real2 v963 = timesminusplus(reverse(v953), load(tbl, 166 * VECWIDTH + tbloffset), times(v953, load(tbl, 167 * VECWIDTH + tbloffset))); real2 v995 = minusplus(uminus(v991), v992); real2 v993 = minusplus(v991, v992); real2 v1003 = timesminusplus(reverse(v993), load(tbl, 174 * VECWIDTH + tbloffset), times(v993, load(tbl, 175 * VECWIDTH + tbloffset))); real2 v843 = timesminusplus(reverse(v833), load(tbl, 142 * VECWIDTH + tbloffset), times(v833, load(tbl, 143 * VECWIDTH + tbloffset))); real2 v1640 = minus(v963, v803); real2 v1644 = plus(v803, v963); real2 v1680 = minus(v1003, v843); real2 v1684 = plus(v843, v1003); real2 v1641 = minusplus(v1639, v1640); real2 v1643 = minusplus(uminus(v1639), v1640); real2 v1657 = timesminusplus(reverse(v1643), load(tbl, 292 * VECWIDTH + tbloffset), times(v1643, load(tbl, 293 * VECWIDTH + tbloffset))); real2 v913 = minusplus(v911, v912); real2 v915 = minusplus(uminus(v911), v912); real2 v1073 = minusplus(v1071, v1072); real2 v1075 = minusplus(uminus(v1071), v1072); real2 v923 = timesminusplus(reverse(v913), load(tbl, 158 * VECWIDTH + tbloffset), times(v913, load(tbl, 159 * VECWIDTH + tbloffset))); real2 v1083 = timesminusplus(reverse(v1073), load(tbl, 190 * VECWIDTH + tbloffset), times(v1073, load(tbl, 191 * VECWIDTH + tbloffset))); real2 v1685 = plus(v923, v1083); real2 v1679 = reverse(minus(v923, v1083)); real2 v1681 = minusplus(v1679, v1680); real2 v1683 = minusplus(uminus(v1679), v1680); real2 v1697 = timesminusplus(reverse(v1683), load(tbl, 300 * VECWIDTH + tbloffset), times(v1683, load(tbl, 301 * VECWIDTH + tbloffset))); real2 v1809 = plus(v1657, v1697); real2 v1803 = reverse(minus(v1657, v1697)); scatter(out, 26, 128, plus(v1808, v1809)); real2 v1822 = minus(v1808, v1809); scatter(out, 90, 128, timesminusplus(v1822, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1822), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1807 = minusplus(uminus(v1803), v1804); real2 v1805 = minusplus(v1803, v1804); scatter(out, 58, 128, timesminusplus(reverse(v1805), load(tbl, 318 * VECWIDTH + tbloffset), times(v1805, load(tbl, 319 * VECWIDTH + tbloffset)))); scatter(out, 122, 128, timesminusplus(reverse(v1807), load(tbl, 320 * VECWIDTH + tbloffset), times(v1807, load(tbl, 321 * VECWIDTH + tbloffset)))); real2 v1651 = timesminusplus(reverse(v1641), load(tbl, 290 * VECWIDTH + tbloffset), times(v1641, load(tbl, 291 * VECWIDTH + tbloffset))); real2 v1691 = timesminusplus(reverse(v1681), load(tbl, 298 * VECWIDTH + tbloffset), times(v1681, load(tbl, 299 * VECWIDTH + tbloffset))); real2 v1783 = plus(v1651, v1691); real2 v1777 = reverse(minus(v1651, v1691)); real2 v1779 = minusplus(v1777, v1778); real2 v1781 = minusplus(uminus(v1777), v1778); scatter(out, 106, 128, timesminusplus(reverse(v1781), load(tbl, 316 * VECWIDTH + tbloffset), times(v1781, load(tbl, 317 * VECWIDTH + tbloffset)))); scatter(out, 42, 128, timesminusplus(reverse(v1779), load(tbl, 314 * VECWIDTH + tbloffset), times(v1779, load(tbl, 315 * VECWIDTH + tbloffset)))); scatter(out, 10, 128, plus(v1782, v1783)); real2 v1796 = minus(v1782, v1783); scatter(out, 74, 128, timesminusplus(v1796, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1796), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1720 = minus(v1645, v1644); real2 v1724 = plus(v1644, v1645); real2 v1719 = reverse(minus(v1684, v1685)); real2 v1725 = plus(v1684, v1685); real2 v1745 = plus(v1724, v1725); real2 v1739 = reverse(minus(v1724, v1725)); scatter(out, 2, 128, plus(v1744, v1745)); real2 v1758 = minus(v1744, v1745); scatter(out, 66, 128, timesminusplus(v1758, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1758), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1741 = minusplus(v1739, v1740); real2 v1743 = minusplus(uminus(v1739), v1740); scatter(out, 98, 128, timesminusplus(reverse(v1743), load(tbl, 312 * VECWIDTH + tbloffset), times(v1743, load(tbl, 313 * VECWIDTH + tbloffset)))); scatter(out, 34, 128, timesminusplus(reverse(v1741), load(tbl, 310 * VECWIDTH + tbloffset), times(v1741, load(tbl, 311 * VECWIDTH + tbloffset)))); real2 v1723 = minusplus(uminus(v1719), v1720); real2 v1721 = minusplus(v1719, v1720); real2 v1737 = timesminusplus(reverse(v1723), load(tbl, 308 * VECWIDTH + tbloffset), times(v1723, load(tbl, 309 * VECWIDTH + tbloffset))); scatter(out, 50, 128, plus(v1717, v1737)); real2 v1770 = minus(v1717, v1737); scatter(out, 114, 128, timesminusplus(v1770, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1770), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1731 = timesminusplus(reverse(v1721), load(tbl, 306 * VECWIDTH + tbloffset), times(v1721, load(tbl, 307 * VECWIDTH + tbloffset))); scatter(out, 18, 128, plus(v1711, v1731)); real2 v1764 = minus(v1711, v1731); scatter(out, 82, 128, timesminusplus(v1764, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1764), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v809 = timesminusplus(reverse(v795), load(tbl, 136 * VECWIDTH + tbloffset), times(v795, load(tbl, 137 * VECWIDTH + tbloffset))); real2 v969 = timesminusplus(reverse(v955), load(tbl, 168 * VECWIDTH + tbloffset), times(v955, load(tbl, 169 * VECWIDTH + tbloffset))); real2 v1850 = minus(v969, v809); real2 v1854 = plus(v809, v969); real2 v849 = timesminusplus(reverse(v835), load(tbl, 144 * VECWIDTH + tbloffset), times(v835, load(tbl, 145 * VECWIDTH + tbloffset))); real2 v929 = timesminusplus(reverse(v915), load(tbl, 160 * VECWIDTH + tbloffset), times(v915, load(tbl, 161 * VECWIDTH + tbloffset))); real2 v889 = timesminusplus(reverse(v875), load(tbl, 152 * VECWIDTH + tbloffset), times(v875, load(tbl, 153 * VECWIDTH + tbloffset))); real2 v1089 = timesminusplus(reverse(v1075), load(tbl, 192 * VECWIDTH + tbloffset), times(v1075, load(tbl, 193 * VECWIDTH + tbloffset))); real2 v1009 = timesminusplus(reverse(v995), load(tbl, 176 * VECWIDTH + tbloffset), times(v995, load(tbl, 177 * VECWIDTH + tbloffset))); real2 v1890 = minus(v1009, v849); real2 v1894 = plus(v849, v1009); real2 v1849 = reverse(minus(v889, v1049)); real2 v1855 = plus(v889, v1049); real2 v1930 = minus(v1855, v1854); real2 v1934 = plus(v1854, v1855); real2 v1895 = plus(v929, v1089); real2 v1889 = reverse(minus(v929, v1089)); real2 v1929 = reverse(minus(v1894, v1895)); real2 v1935 = plus(v1894, v1895); real2 v1955 = plus(v1934, v1935); real2 v1949 = reverse(minus(v1934, v1935)); scatter(out, 6, 128, plus(v1954, v1955)); real2 v1968 = minus(v1954, v1955); scatter(out, 70, 128, timesminusplus(v1968, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1968), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1951 = minusplus(v1949, v1950); scatter(out, 38, 128, timesminusplus(reverse(v1951), load(tbl, 346 * VECWIDTH + tbloffset), times(v1951, load(tbl, 347 * VECWIDTH + tbloffset)))); real2 v1953 = minusplus(uminus(v1949), v1950); scatter(out, 102, 128, timesminusplus(reverse(v1953), load(tbl, 348 * VECWIDTH + tbloffset), times(v1953, load(tbl, 349 * VECWIDTH + tbloffset)))); real2 v1931 = minusplus(v1929, v1930); real2 v1933 = minusplus(uminus(v1929), v1930); real2 v1947 = timesminusplus(reverse(v1933), load(tbl, 344 * VECWIDTH + tbloffset), times(v1933, load(tbl, 345 * VECWIDTH + tbloffset))); scatter(out, 54, 128, plus(v1927, v1947)); real2 v1980 = minus(v1927, v1947); scatter(out, 118, 128, timesminusplus(v1980, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1980), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1941 = timesminusplus(reverse(v1931), load(tbl, 342 * VECWIDTH + tbloffset), times(v1931, load(tbl, 343 * VECWIDTH + tbloffset))); scatter(out, 22, 128, plus(v1921, v1941)); real2 v1974 = minus(v1921, v1941); scatter(out, 86, 128, timesminusplus(v1974, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1974), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1851 = minusplus(v1849, v1850); real2 v1853 = minusplus(uminus(v1849), v1850); real2 v1867 = timesminusplus(reverse(v1853), load(tbl, 328 * VECWIDTH + tbloffset), times(v1853, load(tbl, 329 * VECWIDTH + tbloffset))); real2 v1891 = minusplus(v1889, v1890); real2 v1893 = minusplus(uminus(v1889), v1890); real2 v1907 = timesminusplus(reverse(v1893), load(tbl, 336 * VECWIDTH + tbloffset), times(v1893, load(tbl, 337 * VECWIDTH + tbloffset))); real2 v2019 = plus(v1867, v1907); real2 v2013 = reverse(minus(v1867, v1907)); scatter(out, 30, 128, plus(v2018, v2019)); real2 v2032 = minus(v2018, v2019); scatter(out, 94, 128, timesminusplus(v2032, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2032), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2017 = minusplus(uminus(v2013), v2014); scatter(out, 126, 128, timesminusplus(reverse(v2017), load(tbl, 356 * VECWIDTH + tbloffset), times(v2017, load(tbl, 357 * VECWIDTH + tbloffset)))); real2 v2015 = minusplus(v2013, v2014); scatter(out, 62, 128, timesminusplus(reverse(v2015), load(tbl, 354 * VECWIDTH + tbloffset), times(v2015, load(tbl, 355 * VECWIDTH + tbloffset)))); real2 v1861 = timesminusplus(reverse(v1851), load(tbl, 326 * VECWIDTH + tbloffset), times(v1851, load(tbl, 327 * VECWIDTH + tbloffset))); real2 v1901 = timesminusplus(reverse(v1891), load(tbl, 334 * VECWIDTH + tbloffset), times(v1891, load(tbl, 335 * VECWIDTH + tbloffset))); real2 v1993 = plus(v1861, v1901); real2 v1987 = reverse(minus(v1861, v1901)); scatter(out, 14, 128, plus(v1992, v1993)); real2 v2006 = minus(v1992, v1993); scatter(out, 78, 128, timesminusplus(v2006, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2006), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1991 = minusplus(uminus(v1987), v1988); scatter(out, 110, 128, timesminusplus(reverse(v1991), load(tbl, 352 * VECWIDTH + tbloffset), times(v1991, load(tbl, 353 * VECWIDTH + tbloffset)))); real2 v1989 = minusplus(v1987, v1988); scatter(out, 46, 128, timesminusplus(reverse(v1989), load(tbl, 350 * VECWIDTH + tbloffset), times(v1989, load(tbl, 351 * VECWIDTH + tbloffset)))); real2 v593 = minusplus(v591, v592); real2 v595 = minusplus(uminus(v591), v592); real2 v473 = minusplus(v471, v472); real2 v475 = minusplus(uminus(v471), v472); real2 v555 = minusplus(uminus(v551), v552); real2 v553 = minusplus(v551, v552); real2 v609 = timesminusplus(reverse(v595), load(tbl, 96 * VECWIDTH + tbloffset), times(v595, load(tbl, 97 * VECWIDTH + tbloffset))); real2 v195 = minusplus(uminus(v191), v192); real2 v193 = minusplus(v191, v192); real2 v275 = minusplus(uminus(v271), v272); real2 v273 = minusplus(v271, v272); real2 v673 = minusplus(v671, v672); real2 v675 = minusplus(uminus(v671), v672); real2 v689 = timesminusplus(reverse(v675), load(tbl, 112 * VECWIDTH + tbloffset), times(v675, load(tbl, 113 * VECWIDTH + tbloffset))); real2 v209 = timesminusplus(reverse(v195), load(tbl, 16 * VECWIDTH + tbloffset), times(v195, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v289 = timesminusplus(reverse(v275), load(tbl, 32 * VECWIDTH + tbloffset), times(v275, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v755 = minusplus(uminus(v751), v752); real2 v753 = minusplus(v751, v752); real2 v435 = minusplus(uminus(v431), v432); real2 v433 = minusplus(v431, v432); real2 v513 = minusplus(v511, v512); real2 v515 = minusplus(uminus(v511), v512); real2 v529 = timesminusplus(reverse(v515), load(tbl, 80 * VECWIDTH + tbloffset), times(v515, load(tbl, 81 * VECWIDTH + tbloffset))); real2 v353 = minusplus(v351, v352); real2 v355 = minusplus(uminus(v351), v352); real2 v369 = timesminusplus(reverse(v355), load(tbl, 48 * VECWIDTH + tbloffset), times(v355, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v2631 = plus(v369, v689); real2 v2625 = reverse(minus(v369, v689)); real2 v449 = timesminusplus(reverse(v435), load(tbl, 64 * VECWIDTH + tbloffset), times(v435, load(tbl, 65 * VECWIDTH + tbloffset))); real2 v2710 = plus(v289, v609); real2 v2706 = minus(v609, v289); real2 v2630 = plus(v209, v529); real2 v2626 = minus(v529, v209); real2 v2790 = plus(v2630, v2631); real2 v2786 = minus(v2631, v2630); real2 v713 = minusplus(v711, v712); real2 v715 = minusplus(uminus(v711), v712); real2 v769 = timesminusplus(reverse(v755), load(tbl, 128 * VECWIDTH + tbloffset), times(v755, load(tbl, 129 * VECWIDTH + tbloffset))); real2 v2705 = reverse(minus(v449, v769)); real2 v2711 = plus(v449, v769); real2 v313 = minusplus(v311, v312); real2 v315 = minusplus(uminus(v311), v312); real2 v393 = minusplus(v391, v392); real2 v395 = minusplus(uminus(v391), v392); real2 v409 = timesminusplus(reverse(v395), load(tbl, 56 * VECWIDTH + tbloffset), times(v395, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v729 = timesminusplus(reverse(v715), load(tbl, 120 * VECWIDTH + tbloffset), times(v715, load(tbl, 121 * VECWIDTH + tbloffset))); real2 v329 = timesminusplus(reverse(v315), load(tbl, 40 * VECWIDTH + tbloffset), times(v315, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v489 = timesminusplus(reverse(v475), load(tbl, 72 * VECWIDTH + tbloffset), times(v475, load(tbl, 73 * VECWIDTH + tbloffset))); real2 v153 = minusplus(v151, v152); real2 v155 = minusplus(uminus(v151), v152); real2 v169 = timesminusplus(reverse(v155), load(tbl, 8 * VECWIDTH + tbloffset), times(v155, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v2586 = minus(v489, v169); real2 v2590 = plus(v169, v489); real2 v233 = minusplus(v231, v232); real2 v235 = minusplus(uminus(v231), v232); real2 v633 = minusplus(v631, v632); real2 v635 = minusplus(uminus(v631), v632); real2 v649 = timesminusplus(reverse(v635), load(tbl, 104 * VECWIDTH + tbloffset), times(v635, load(tbl, 105 * VECWIDTH + tbloffset))); real2 v249 = timesminusplus(reverse(v235), load(tbl, 24 * VECWIDTH + tbloffset), times(v235, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v569 = timesminusplus(reverse(v555), load(tbl, 88 * VECWIDTH + tbloffset), times(v555, load(tbl, 89 * VECWIDTH + tbloffset))); real2 v2670 = plus(v249, v569); real2 v2666 = minus(v569, v249); real2 v2785 = reverse(minus(v2710, v2711)); real2 v2791 = plus(v2710, v2711); real2 v2825 = reverse(minus(v2790, v2791)); real2 v2831 = plus(v2790, v2791); real2 v2671 = plus(v409, v729); real2 v2665 = reverse(minus(v409, v729)); real2 v2745 = reverse(minus(v2670, v2671)); real2 v2751 = plus(v2670, v2671); real2 v2806 = minus(v2731, v2730); real2 v2810 = plus(v2730, v2731); real2 v2846 = minus(v2811, v2810); real2 v2850 = plus(v2810, v2811); real2 v2591 = plus(v329, v649); real2 v2585 = reverse(minus(v329, v649)); real2 v2750 = plus(v2590, v2591); real2 v2746 = minus(v2591, v2590); real2 v2830 = plus(v2750, v2751); real2 v2826 = minus(v2751, v2750); real2 v2845 = reverse(minus(v2830, v2831)); real2 v2851 = plus(v2830, v2831); scatter(out, 3, 128, plus(v2850, v2851)); real2 v2864 = minus(v2850, v2851); scatter(out, 67, 128, timesminusplus(v2864, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2864), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2849 = minusplus(uminus(v2845), v2846); real2 v2847 = minusplus(v2845, v2846); scatter(out, 35, 128, timesminusplus(reverse(v2847), load(tbl, 506 * VECWIDTH + tbloffset), times(v2847, load(tbl, 507 * VECWIDTH + tbloffset)))); scatter(out, 99, 128, timesminusplus(reverse(v2849), load(tbl, 508 * VECWIDTH + tbloffset), times(v2849, load(tbl, 509 * VECWIDTH + tbloffset)))); real2 v2827 = minusplus(v2825, v2826); real2 v2829 = minusplus(uminus(v2825), v2826); real2 v2837 = timesminusplus(reverse(v2827), load(tbl, 502 * VECWIDTH + tbloffset), times(v2827, load(tbl, 503 * VECWIDTH + tbloffset))); real2 v2809 = minusplus(uminus(v2805), v2806); real2 v2807 = minusplus(v2805, v2806); real2 v2817 = timesminusplus(reverse(v2807), load(tbl, 498 * VECWIDTH + tbloffset), times(v2807, load(tbl, 499 * VECWIDTH + tbloffset))); scatter(out, 19, 128, plus(v2817, v2837)); real2 v2870 = minus(v2817, v2837); scatter(out, 83, 128, timesminusplus(v2870, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2870), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2823 = timesminusplus(reverse(v2809), load(tbl, 500 * VECWIDTH + tbloffset), times(v2809, load(tbl, 501 * VECWIDTH + tbloffset))); real2 v2843 = timesminusplus(reverse(v2829), load(tbl, 504 * VECWIDTH + tbloffset), times(v2829, load(tbl, 505 * VECWIDTH + tbloffset))); scatter(out, 51, 128, plus(v2823, v2843)); real2 v2876 = minus(v2823, v2843); scatter(out, 115, 128, timesminusplus(v2876, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2876), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2787 = minusplus(v2785, v2786); real2 v2789 = minusplus(uminus(v2785), v2786); real2 v2803 = timesminusplus(reverse(v2789), load(tbl, 496 * VECWIDTH + tbloffset), times(v2789, load(tbl, 497 * VECWIDTH + tbloffset))); real2 v2727 = minusplus(v2725, v2726); real2 v2729 = minusplus(uminus(v2725), v2726); real2 v2743 = timesminusplus(reverse(v2729), load(tbl, 484 * VECWIDTH + tbloffset), times(v2729, load(tbl, 485 * VECWIDTH + tbloffset))); real2 v2914 = plus(v2743, v2783); real2 v2910 = minus(v2783, v2743); real2 v2749 = minusplus(uminus(v2745), v2746); real2 v2747 = minusplus(v2745, v2746); real2 v2763 = timesminusplus(reverse(v2749), load(tbl, 488 * VECWIDTH + tbloffset), times(v2749, load(tbl, 489 * VECWIDTH + tbloffset))); real2 v2909 = reverse(minus(v2763, v2803)); real2 v2915 = plus(v2763, v2803); scatter(out, 27, 128, plus(v2914, v2915)); real2 v2928 = minus(v2914, v2915); scatter(out, 91, 128, timesminusplus(v2928, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2928), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2913 = minusplus(uminus(v2909), v2910); scatter(out, 123, 128, timesminusplus(reverse(v2913), load(tbl, 516 * VECWIDTH + tbloffset), times(v2913, load(tbl, 517 * VECWIDTH + tbloffset)))); real2 v2911 = minusplus(v2909, v2910); scatter(out, 59, 128, timesminusplus(reverse(v2911), load(tbl, 514 * VECWIDTH + tbloffset), times(v2911, load(tbl, 515 * VECWIDTH + tbloffset)))); real2 v2737 = timesminusplus(reverse(v2727), load(tbl, 482 * VECWIDTH + tbloffset), times(v2727, load(tbl, 483 * VECWIDTH + tbloffset))); real2 v2888 = plus(v2737, v2777); real2 v2884 = minus(v2777, v2737); real2 v2797 = timesminusplus(reverse(v2787), load(tbl, 494 * VECWIDTH + tbloffset), times(v2787, load(tbl, 495 * VECWIDTH + tbloffset))); real2 v2757 = timesminusplus(reverse(v2747), load(tbl, 486 * VECWIDTH + tbloffset), times(v2747, load(tbl, 487 * VECWIDTH + tbloffset))); real2 v2889 = plus(v2757, v2797); real2 v2883 = reverse(minus(v2757, v2797)); scatter(out, 11, 128, plus(v2888, v2889)); real2 v2902 = minus(v2888, v2889); scatter(out, 75, 128, timesminusplus(v2902, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2902), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2887 = minusplus(uminus(v2883), v2884); scatter(out, 107, 128, timesminusplus(reverse(v2887), load(tbl, 512 * VECWIDTH + tbloffset), times(v2887, load(tbl, 513 * VECWIDTH + tbloffset)))); real2 v2885 = minusplus(v2883, v2884); scatter(out, 43, 128, timesminusplus(reverse(v2885), load(tbl, 510 * VECWIDTH + tbloffset), times(v2885, load(tbl, 511 * VECWIDTH + tbloffset)))); real2 v2669 = minusplus(uminus(v2665), v2666); real2 v2667 = minusplus(v2665, v2666); real2 v2707 = minusplus(v2705, v2706); real2 v2709 = minusplus(uminus(v2705), v2706); real2 v2717 = timesminusplus(reverse(v2707), load(tbl, 478 * VECWIDTH + tbloffset), times(v2707, load(tbl, 479 * VECWIDTH + tbloffset))); real2 v2627 = minusplus(v2625, v2626); real2 v2629 = minusplus(uminus(v2625), v2626); real2 v2637 = timesminusplus(reverse(v2627), load(tbl, 462 * VECWIDTH + tbloffset), times(v2627, load(tbl, 463 * VECWIDTH + tbloffset))); real2 v2961 = plus(v2637, v2717); real2 v2955 = reverse(minus(v2637, v2717)); real2 v2649 = minusplus(uminus(v2645), v2646); real2 v2647 = minusplus(v2645, v2646); real2 v2569 = minusplus(uminus(v2565), v2566); real2 v2567 = minusplus(v2565, v2566); real2 v2577 = timesminusplus(reverse(v2567), load(tbl, 450 * VECWIDTH + tbloffset), times(v2567, load(tbl, 451 * VECWIDTH + tbloffset))); real2 v2657 = timesminusplus(reverse(v2647), load(tbl, 466 * VECWIDTH + tbloffset), times(v2647, load(tbl, 467 * VECWIDTH + tbloffset))); real2 v2936 = minus(v2657, v2577); real2 v2940 = plus(v2577, v2657); real2 v2976 = minus(v2941, v2940); real2 v2980 = plus(v2940, v2941); real2 v2677 = timesminusplus(reverse(v2667), load(tbl, 470 * VECWIDTH + tbloffset), times(v2667, load(tbl, 471 * VECWIDTH + tbloffset))); real2 v2587 = minusplus(v2585, v2586); real2 v2589 = minusplus(uminus(v2585), v2586); real2 v2597 = timesminusplus(reverse(v2587), load(tbl, 454 * VECWIDTH + tbloffset), times(v2587, load(tbl, 455 * VECWIDTH + tbloffset))); real2 v2956 = minus(v2677, v2597); real2 v2960 = plus(v2597, v2677); real2 v2975 = reverse(minus(v2960, v2961)); real2 v2981 = plus(v2960, v2961); scatter(out, 7, 128, plus(v2980, v2981)); real2 v2994 = minus(v2980, v2981); scatter(out, 71, 128, timesminusplus(v2994, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2994), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2979 = minusplus(uminus(v2975), v2976); scatter(out, 103, 128, timesminusplus(reverse(v2979), load(tbl, 528 * VECWIDTH + tbloffset), times(v2979, load(tbl, 529 * VECWIDTH + tbloffset)))); real2 v2977 = minusplus(v2975, v2976); scatter(out, 39, 128, timesminusplus(reverse(v2977), load(tbl, 526 * VECWIDTH + tbloffset), times(v2977, load(tbl, 527 * VECWIDTH + tbloffset)))); real2 v2939 = minusplus(uminus(v2935), v2936); real2 v2937 = minusplus(v2935, v2936); real2 v2953 = timesminusplus(reverse(v2939), load(tbl, 520 * VECWIDTH + tbloffset), times(v2939, load(tbl, 521 * VECWIDTH + tbloffset))); real2 v2957 = minusplus(v2955, v2956); real2 v2959 = minusplus(uminus(v2955), v2956); real2 v2973 = timesminusplus(reverse(v2959), load(tbl, 524 * VECWIDTH + tbloffset), times(v2959, load(tbl, 525 * VECWIDTH + tbloffset))); scatter(out, 55, 128, plus(v2953, v2973)); real2 v3006 = minus(v2953, v2973); scatter(out, 119, 128, timesminusplus(v3006, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3006), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2947 = timesminusplus(reverse(v2937), load(tbl, 518 * VECWIDTH + tbloffset), times(v2937, load(tbl, 519 * VECWIDTH + tbloffset))); real2 v2967 = timesminusplus(reverse(v2957), load(tbl, 522 * VECWIDTH + tbloffset), times(v2957, load(tbl, 523 * VECWIDTH + tbloffset))); scatter(out, 23, 128, plus(v2947, v2967)); real2 v3000 = minus(v2947, v2967); scatter(out, 87, 128, timesminusplus(v3000, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3000), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2663 = timesminusplus(reverse(v2649), load(tbl, 468 * VECWIDTH + tbloffset), times(v2649, load(tbl, 469 * VECWIDTH + tbloffset))); real2 v2583 = timesminusplus(reverse(v2569), load(tbl, 452 * VECWIDTH + tbloffset), times(v2569, load(tbl, 453 * VECWIDTH + tbloffset))); real2 v3014 = minus(v2663, v2583); real2 v3018 = plus(v2583, v2663); real2 v3015 = minusplus(v3013, v3014); real2 v3017 = minusplus(uminus(v3013), v3014); real2 v2643 = timesminusplus(reverse(v2629), load(tbl, 464 * VECWIDTH + tbloffset), times(v2629, load(tbl, 465 * VECWIDTH + tbloffset))); real2 v2723 = timesminusplus(reverse(v2709), load(tbl, 480 * VECWIDTH + tbloffset), times(v2709, load(tbl, 481 * VECWIDTH + tbloffset))); real2 v3039 = plus(v2643, v2723); real2 v3033 = reverse(minus(v2643, v2723)); real2 v2683 = timesminusplus(reverse(v2669), load(tbl, 472 * VECWIDTH + tbloffset), times(v2669, load(tbl, 473 * VECWIDTH + tbloffset))); real2 v3031 = timesminusplus(reverse(v3017), load(tbl, 532 * VECWIDTH + tbloffset), times(v3017, load(tbl, 533 * VECWIDTH + tbloffset))); real2 v2603 = timesminusplus(reverse(v2589), load(tbl, 456 * VECWIDTH + tbloffset), times(v2589, load(tbl, 457 * VECWIDTH + tbloffset))); real2 v3034 = minus(v2683, v2603); real2 v3038 = plus(v2603, v2683); real2 v3037 = minusplus(uminus(v3033), v3034); real2 v3035 = minusplus(v3033, v3034); real2 v3051 = timesminusplus(reverse(v3037), load(tbl, 536 * VECWIDTH + tbloffset), times(v3037, load(tbl, 537 * VECWIDTH + tbloffset))); scatter(out, 63, 128, plus(v3031, v3051)); real2 v3084 = minus(v3031, v3051); scatter(out, 127, 128, timesminusplus(v3084, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3084), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v3025 = timesminusplus(reverse(v3015), load(tbl, 530 * VECWIDTH + tbloffset), times(v3015, load(tbl, 531 * VECWIDTH + tbloffset))); real2 v3045 = timesminusplus(reverse(v3035), load(tbl, 534 * VECWIDTH + tbloffset), times(v3035, load(tbl, 535 * VECWIDTH + tbloffset))); scatter(out, 31, 128, plus(v3025, v3045)); real2 v3078 = minus(v3025, v3045); scatter(out, 95, 128, timesminusplus(v3078, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3078), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v3058 = plus(v3018, v3019); real2 v3054 = minus(v3019, v3018); real2 v3053 = reverse(minus(v3038, v3039)); real2 v3059 = plus(v3038, v3039); real2 v3055 = minusplus(v3053, v3054); scatter(out, 47, 128, timesminusplus(reverse(v3055), load(tbl, 538 * VECWIDTH + tbloffset), times(v3055, load(tbl, 539 * VECWIDTH + tbloffset)))); real2 v3057 = minusplus(uminus(v3053), v3054); scatter(out, 111, 128, timesminusplus(reverse(v3057), load(tbl, 540 * VECWIDTH + tbloffset), times(v3057, load(tbl, 541 * VECWIDTH + tbloffset)))); scatter(out, 15, 128, plus(v3058, v3059)); real2 v3072 = minus(v3058, v3059); scatter(out, 79, 128, timesminusplus(v3072, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3072), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v683 = timesminusplus(reverse(v673), load(tbl, 110 * VECWIDTH + tbloffset), times(v673, load(tbl, 111 * VECWIDTH + tbloffset))); real2 v363 = timesminusplus(reverse(v353), load(tbl, 46 * VECWIDTH + tbloffset), times(v353, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v2105 = plus(v363, v683); real2 v2099 = reverse(minus(v363, v683)); real2 v283 = timesminusplus(reverse(v273), load(tbl, 30 * VECWIDTH + tbloffset), times(v273, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v723 = timesminusplus(reverse(v713), load(tbl, 118 * VECWIDTH + tbloffset), times(v713, load(tbl, 119 * VECWIDTH + tbloffset))); real2 v403 = timesminusplus(reverse(v393), load(tbl, 54 * VECWIDTH + tbloffset), times(v393, load(tbl, 55 * VECWIDTH + tbloffset))); real2 v603 = timesminusplus(reverse(v593), load(tbl, 94 * VECWIDTH + tbloffset), times(v593, load(tbl, 95 * VECWIDTH + tbloffset))); real2 v2180 = minus(v603, v283); real2 v2184 = plus(v283, v603); real2 v2145 = plus(v403, v723); real2 v2139 = reverse(minus(v403, v723)); real2 v543 = timesminusplus(reverse(v533), load(tbl, 82 * VECWIDTH + tbloffset), times(v533, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v383 = timesminusplus(reverse(v373), load(tbl, 50 * VECWIDTH + tbloffset), times(v373, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v703 = timesminusplus(reverse(v693), load(tbl, 114 * VECWIDTH + tbloffset), times(v693, load(tbl, 115 * VECWIDTH + tbloffset))); real2 v2125 = plus(v383, v703); real2 v2119 = reverse(minus(v383, v703)); real2 v223 = timesminusplus(reverse(v213), load(tbl, 18 * VECWIDTH + tbloffset), times(v213, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v2120 = minus(v543, v223); real2 v2124 = plus(v223, v543); real2 v443 = timesminusplus(reverse(v433), load(tbl, 62 * VECWIDTH + tbloffset), times(v433, load(tbl, 63 * VECWIDTH + tbloffset))); real2 v203 = timesminusplus(reverse(v193), load(tbl, 14 * VECWIDTH + tbloffset), times(v193, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v763 = timesminusplus(reverse(v753), load(tbl, 126 * VECWIDTH + tbloffset), times(v753, load(tbl, 127 * VECWIDTH + tbloffset))); real2 v2179 = reverse(minus(v443, v763)); real2 v2185 = plus(v443, v763); real2 v523 = timesminusplus(reverse(v513), load(tbl, 78 * VECWIDTH + tbloffset), times(v513, load(tbl, 79 * VECWIDTH + tbloffset))); real2 v2100 = minus(v523, v203); real2 v2104 = plus(v203, v523); real2 v2264 = plus(v2104, v2105); real2 v2260 = minus(v2105, v2104); real2 v643 = timesminusplus(reverse(v633), load(tbl, 102 * VECWIDTH + tbloffset), times(v633, load(tbl, 103 * VECWIDTH + tbloffset))); real2 v2265 = plus(v2184, v2185); real2 v2259 = reverse(minus(v2184, v2185)); real2 v563 = timesminusplus(reverse(v553), load(tbl, 86 * VECWIDTH + tbloffset), times(v553, load(tbl, 87 * VECWIDTH + tbloffset))); real2 v243 = timesminusplus(reverse(v233), load(tbl, 22 * VECWIDTH + tbloffset), times(v233, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v2144 = plus(v243, v563); real2 v2140 = minus(v563, v243); real2 v143 = timesminusplus(reverse(v133), load(tbl, 2 * VECWIDTH + tbloffset), times(v133, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v183 = timesminusplus(reverse(v173), load(tbl, 10 * VECWIDTH + tbloffset), times(v173, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v2084 = plus(v183, v503); real2 v2080 = minus(v503, v183); real2 v163 = timesminusplus(reverse(v153), load(tbl, 6 * VECWIDTH + tbloffset), times(v153, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v303 = timesminusplus(reverse(v293), load(tbl, 34 * VECWIDTH + tbloffset), times(v293, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v623 = timesminusplus(reverse(v613), load(tbl, 98 * VECWIDTH + tbloffset), times(v613, load(tbl, 99 * VECWIDTH + tbloffset))); real2 v2039 = reverse(minus(v303, v623)); real2 v2045 = plus(v303, v623); real2 v463 = timesminusplus(reverse(v453), load(tbl, 66 * VECWIDTH + tbloffset), times(v453, load(tbl, 67 * VECWIDTH + tbloffset))); real2 v2044 = plus(v143, v463); real2 v2040 = minus(v463, v143); real2 v2204 = plus(v2044, v2045); real2 v2200 = minus(v2045, v2044); real2 v323 = timesminusplus(reverse(v313), load(tbl, 38 * VECWIDTH + tbloffset), times(v313, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v2205 = plus(v2124, v2125); real2 v2199 = reverse(minus(v2124, v2125)); real2 v2280 = minus(v2205, v2204); real2 v2284 = plus(v2204, v2205); real2 v2225 = plus(v2144, v2145); real2 v2219 = reverse(minus(v2144, v2145)); real2 v2305 = plus(v2264, v2265); real2 v2299 = reverse(minus(v2264, v2265)); real2 v2240 = minus(v2085, v2084); real2 v2244 = plus(v2084, v2085); real2 v2279 = reverse(minus(v2244, v2245)); real2 v2285 = plus(v2244, v2245); real2 v2281 = minusplus(v2279, v2280); real2 v2283 = minusplus(uminus(v2279), v2280); real2 v2291 = timesminusplus(reverse(v2281), load(tbl, 406 * VECWIDTH + tbloffset), times(v2281, load(tbl, 407 * VECWIDTH + tbloffset))); real2 v483 = timesminusplus(reverse(v473), load(tbl, 70 * VECWIDTH + tbloffset), times(v473, load(tbl, 71 * VECWIDTH + tbloffset))); real2 v2060 = minus(v483, v163); real2 v2064 = plus(v163, v483); real2 v2065 = plus(v323, v643); real2 v2059 = reverse(minus(v323, v643)); real2 v2220 = minus(v2065, v2064); real2 v2224 = plus(v2064, v2065); real2 v2304 = plus(v2224, v2225); real2 v2300 = minus(v2225, v2224); real2 v2301 = minusplus(v2299, v2300); real2 v2303 = minusplus(uminus(v2299), v2300); real2 v2311 = timesminusplus(reverse(v2301), load(tbl, 410 * VECWIDTH + tbloffset), times(v2301, load(tbl, 411 * VECWIDTH + tbloffset))); scatter(out, 17, 128, plus(v2291, v2311)); real2 v2344 = minus(v2291, v2311); scatter(out, 81, 128, timesminusplus(v2344, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2344), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2297 = timesminusplus(reverse(v2283), load(tbl, 408 * VECWIDTH + tbloffset), times(v2283, load(tbl, 409 * VECWIDTH + tbloffset))); real2 v2317 = timesminusplus(reverse(v2303), load(tbl, 412 * VECWIDTH + tbloffset), times(v2303, load(tbl, 413 * VECWIDTH + tbloffset))); scatter(out, 49, 128, plus(v2297, v2317)); real2 v2350 = minus(v2297, v2317); scatter(out, 113, 128, timesminusplus(v2350, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2350), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2320 = minus(v2285, v2284); real2 v2324 = plus(v2284, v2285); real2 v2325 = plus(v2304, v2305); real2 v2319 = reverse(minus(v2304, v2305)); scatter(out, 1, 128, plus(v2324, v2325)); real2 v2338 = minus(v2324, v2325); scatter(out, 65, 128, timesminusplus(v2338, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2338), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2321 = minusplus(v2319, v2320); scatter(out, 33, 128, timesminusplus(reverse(v2321), load(tbl, 414 * VECWIDTH + tbloffset), times(v2321, load(tbl, 415 * VECWIDTH + tbloffset)))); real2 v2323 = minusplus(uminus(v2319), v2320); scatter(out, 97, 128, timesminusplus(reverse(v2323), load(tbl, 416 * VECWIDTH + tbloffset), times(v2323, load(tbl, 417 * VECWIDTH + tbloffset)))); real2 v2201 = minusplus(v2199, v2200); real2 v2203 = minusplus(uminus(v2199), v2200); real2 v2263 = minusplus(uminus(v2259), v2260); real2 v2261 = minusplus(v2259, v2260); real2 v2243 = minusplus(uminus(v2239), v2240); real2 v2241 = minusplus(v2239, v2240); real2 v2257 = timesminusplus(reverse(v2243), load(tbl, 400 * VECWIDTH + tbloffset), times(v2243, load(tbl, 401 * VECWIDTH + tbloffset))); real2 v2217 = timesminusplus(reverse(v2203), load(tbl, 392 * VECWIDTH + tbloffset), times(v2203, load(tbl, 393 * VECWIDTH + tbloffset))); real2 v2388 = plus(v2217, v2257); real2 v2384 = minus(v2257, v2217); real2 v2277 = timesminusplus(reverse(v2263), load(tbl, 404 * VECWIDTH + tbloffset), times(v2263, load(tbl, 405 * VECWIDTH + tbloffset))); real2 v2221 = minusplus(v2219, v2220); real2 v2223 = minusplus(uminus(v2219), v2220); real2 v2237 = timesminusplus(reverse(v2223), load(tbl, 396 * VECWIDTH + tbloffset), times(v2223, load(tbl, 397 * VECWIDTH + tbloffset))); real2 v2389 = plus(v2237, v2277); real2 v2383 = reverse(minus(v2237, v2277)); scatter(out, 25, 128, plus(v2388, v2389)); real2 v2402 = minus(v2388, v2389); scatter(out, 89, 128, timesminusplus(v2402, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2402), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2385 = minusplus(v2383, v2384); real2 v2387 = minusplus(uminus(v2383), v2384); scatter(out, 121, 128, timesminusplus(reverse(v2387), load(tbl, 424 * VECWIDTH + tbloffset), times(v2387, load(tbl, 425 * VECWIDTH + tbloffset)))); scatter(out, 57, 128, timesminusplus(reverse(v2385), load(tbl, 422 * VECWIDTH + tbloffset), times(v2385, load(tbl, 423 * VECWIDTH + tbloffset)))); real2 v2251 = timesminusplus(reverse(v2241), load(tbl, 398 * VECWIDTH + tbloffset), times(v2241, load(tbl, 399 * VECWIDTH + tbloffset))); real2 v2211 = timesminusplus(reverse(v2201), load(tbl, 390 * VECWIDTH + tbloffset), times(v2201, load(tbl, 391 * VECWIDTH + tbloffset))); real2 v2358 = minus(v2251, v2211); real2 v2362 = plus(v2211, v2251); real2 v2271 = timesminusplus(reverse(v2261), load(tbl, 402 * VECWIDTH + tbloffset), times(v2261, load(tbl, 403 * VECWIDTH + tbloffset))); real2 v2231 = timesminusplus(reverse(v2221), load(tbl, 394 * VECWIDTH + tbloffset), times(v2221, load(tbl, 395 * VECWIDTH + tbloffset))); real2 v2357 = reverse(minus(v2231, v2271)); real2 v2363 = plus(v2231, v2271); scatter(out, 9, 128, plus(v2362, v2363)); real2 v2376 = minus(v2362, v2363); scatter(out, 73, 128, timesminusplus(v2376, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2376), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2361 = minusplus(uminus(v2357), v2358); scatter(out, 105, 128, timesminusplus(reverse(v2361), load(tbl, 420 * VECWIDTH + tbloffset), times(v2361, load(tbl, 421 * VECWIDTH + tbloffset)))); real2 v2359 = minusplus(v2357, v2358); scatter(out, 41, 128, timesminusplus(reverse(v2359), load(tbl, 418 * VECWIDTH + tbloffset), times(v2359, load(tbl, 419 * VECWIDTH + tbloffset)))); real2 v2121 = minusplus(v2119, v2120); real2 v2123 = minusplus(uminus(v2119), v2120); real2 v2083 = minusplus(uminus(v2079), v2080); real2 v2081 = minusplus(v2079, v2080); real2 v2091 = timesminusplus(reverse(v2081), load(tbl, 366 * VECWIDTH + tbloffset), times(v2081, load(tbl, 367 * VECWIDTH + tbloffset))); real2 v2043 = minusplus(uminus(v2039), v2040); real2 v2041 = minusplus(v2039, v2040); real2 v2051 = timesminusplus(reverse(v2041), load(tbl, 358 * VECWIDTH + tbloffset), times(v2041, load(tbl, 359 * VECWIDTH + tbloffset))); real2 v2131 = timesminusplus(reverse(v2121), load(tbl, 374 * VECWIDTH + tbloffset), times(v2121, load(tbl, 375 * VECWIDTH + tbloffset))); real2 v2163 = minusplus(uminus(v2159), v2160); real2 v2161 = minusplus(v2159, v2160); real2 v2171 = timesminusplus(reverse(v2161), load(tbl, 382 * VECWIDTH + tbloffset), times(v2161, load(tbl, 383 * VECWIDTH + tbloffset))); real2 v2409 = reverse(minus(v2091, v2171)); real2 v2415 = plus(v2091, v2171); real2 v2410 = minus(v2131, v2051); real2 v2414 = plus(v2051, v2131); real2 v2454 = plus(v2414, v2415); real2 v2450 = minus(v2415, v2414); real2 v2181 = minusplus(v2179, v2180); real2 v2183 = minusplus(uminus(v2179), v2180); real2 v2191 = timesminusplus(reverse(v2181), load(tbl, 386 * VECWIDTH + tbloffset), times(v2181, load(tbl, 387 * VECWIDTH + tbloffset))); real2 v2103 = minusplus(uminus(v2099), v2100); real2 v2101 = minusplus(v2099, v2100); real2 v2111 = timesminusplus(reverse(v2101), load(tbl, 370 * VECWIDTH + tbloffset), times(v2101, load(tbl, 371 * VECWIDTH + tbloffset))); real2 v2435 = plus(v2111, v2191); real2 v2429 = reverse(minus(v2111, v2191)); real2 v2141 = minusplus(v2139, v2140); real2 v2143 = minusplus(uminus(v2139), v2140); real2 v2151 = timesminusplus(reverse(v2141), load(tbl, 378 * VECWIDTH + tbloffset), times(v2141, load(tbl, 379 * VECWIDTH + tbloffset))); real2 v2063 = minusplus(uminus(v2059), v2060); real2 v2061 = minusplus(v2059, v2060); real2 v2071 = timesminusplus(reverse(v2061), load(tbl, 362 * VECWIDTH + tbloffset), times(v2061, load(tbl, 363 * VECWIDTH + tbloffset))); real2 v2434 = plus(v2071, v2151); real2 v2430 = minus(v2151, v2071); real2 v2455 = plus(v2434, v2435); real2 v2449 = reverse(minus(v2434, v2435)); scatter(out, 5, 128, plus(v2454, v2455)); real2 v2468 = minus(v2454, v2455); scatter(out, 69, 128, timesminusplus(v2468, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2468), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2451 = minusplus(v2449, v2450); real2 v2453 = minusplus(uminus(v2449), v2450); scatter(out, 101, 128, timesminusplus(reverse(v2453), load(tbl, 436 * VECWIDTH + tbloffset), times(v2453, load(tbl, 437 * VECWIDTH + tbloffset)))); scatter(out, 37, 128, timesminusplus(reverse(v2451), load(tbl, 434 * VECWIDTH + tbloffset), times(v2451, load(tbl, 435 * VECWIDTH + tbloffset)))); real2 v2411 = minusplus(v2409, v2410); real2 v2413 = minusplus(uminus(v2409), v2410); real2 v2433 = minusplus(uminus(v2429), v2430); real2 v2431 = minusplus(v2429, v2430); real2 v2421 = timesminusplus(reverse(v2411), load(tbl, 426 * VECWIDTH + tbloffset), times(v2411, load(tbl, 427 * VECWIDTH + tbloffset))); real2 v2441 = timesminusplus(reverse(v2431), load(tbl, 430 * VECWIDTH + tbloffset), times(v2431, load(tbl, 431 * VECWIDTH + tbloffset))); scatter(out, 21, 128, plus(v2421, v2441)); real2 v2474 = minus(v2421, v2441); scatter(out, 85, 128, timesminusplus(v2474, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2474), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2427 = timesminusplus(reverse(v2413), load(tbl, 428 * VECWIDTH + tbloffset), times(v2413, load(tbl, 429 * VECWIDTH + tbloffset))); real2 v2447 = timesminusplus(reverse(v2433), load(tbl, 432 * VECWIDTH + tbloffset), times(v2433, load(tbl, 433 * VECWIDTH + tbloffset))); scatter(out, 53, 128, plus(v2427, v2447)); real2 v2480 = minus(v2427, v2447); scatter(out, 117, 128, timesminusplus(v2480, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2480), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2057 = timesminusplus(reverse(v2043), load(tbl, 360 * VECWIDTH + tbloffset), times(v2043, load(tbl, 361 * VECWIDTH + tbloffset))); real2 v2097 = timesminusplus(reverse(v2083), load(tbl, 368 * VECWIDTH + tbloffset), times(v2083, load(tbl, 369 * VECWIDTH + tbloffset))); real2 v2157 = timesminusplus(reverse(v2143), load(tbl, 380 * VECWIDTH + tbloffset), times(v2143, load(tbl, 381 * VECWIDTH + tbloffset))); real2 v2197 = timesminusplus(reverse(v2183), load(tbl, 388 * VECWIDTH + tbloffset), times(v2183, load(tbl, 389 * VECWIDTH + tbloffset))); real2 v2117 = timesminusplus(reverse(v2103), load(tbl, 372 * VECWIDTH + tbloffset), times(v2103, load(tbl, 373 * VECWIDTH + tbloffset))); real2 v2507 = reverse(minus(v2117, v2197)); real2 v2513 = plus(v2117, v2197); real2 v2137 = timesminusplus(reverse(v2123), load(tbl, 376 * VECWIDTH + tbloffset), times(v2123, load(tbl, 377 * VECWIDTH + tbloffset))); real2 v2488 = minus(v2137, v2057); real2 v2492 = plus(v2057, v2137); real2 v2177 = timesminusplus(reverse(v2163), load(tbl, 384 * VECWIDTH + tbloffset), times(v2163, load(tbl, 385 * VECWIDTH + tbloffset))); real2 v2493 = plus(v2097, v2177); real2 v2487 = reverse(minus(v2097, v2177)); real2 v2532 = plus(v2492, v2493); real2 v2528 = minus(v2493, v2492); real2 v2077 = timesminusplus(reverse(v2063), load(tbl, 364 * VECWIDTH + tbloffset), times(v2063, load(tbl, 365 * VECWIDTH + tbloffset))); real2 v2512 = plus(v2077, v2157); real2 v2508 = minus(v2157, v2077); real2 v2527 = reverse(minus(v2512, v2513)); real2 v2533 = plus(v2512, v2513); real2 v2529 = minusplus(v2527, v2528); real2 v2531 = minusplus(uminus(v2527), v2528); scatter(out, 109, 128, timesminusplus(reverse(v2531), load(tbl, 448 * VECWIDTH + tbloffset), times(v2531, load(tbl, 449 * VECWIDTH + tbloffset)))); scatter(out, 45, 128, timesminusplus(reverse(v2529), load(tbl, 446 * VECWIDTH + tbloffset), times(v2529, load(tbl, 447 * VECWIDTH + tbloffset)))); scatter(out, 13, 128, plus(v2532, v2533)); real2 v2546 = minus(v2532, v2533); scatter(out, 77, 128, timesminusplus(v2546, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2546), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2509 = minusplus(v2507, v2508); real2 v2511 = minusplus(uminus(v2507), v2508); real2 v2491 = minusplus(uminus(v2487), v2488); real2 v2489 = minusplus(v2487, v2488); real2 v2499 = timesminusplus(reverse(v2489), load(tbl, 438 * VECWIDTH + tbloffset), times(v2489, load(tbl, 439 * VECWIDTH + tbloffset))); real2 v2519 = timesminusplus(reverse(v2509), load(tbl, 442 * VECWIDTH + tbloffset), times(v2509, load(tbl, 443 * VECWIDTH + tbloffset))); scatter(out, 29, 128, plus(v2499, v2519)); real2 v2552 = minus(v2499, v2519); scatter(out, 93, 128, timesminusplus(v2552, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2552), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2505 = timesminusplus(reverse(v2491), load(tbl, 440 * VECWIDTH + tbloffset), times(v2491, load(tbl, 441 * VECWIDTH + tbloffset))); real2 v2525 = timesminusplus(reverse(v2511), load(tbl, 444 * VECWIDTH + tbloffset), times(v2511, load(tbl, 445 * VECWIDTH + tbloffset))); scatter(out, 61, 128, plus(v2505, v2525)); real2 v2558 = minus(v2505, v2525); scatter(out, 125, 128, timesminusplus(v2558, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2558), load(tbl, 1 * VECWIDTH + tbloffset)))); // Pres : 76263 } } ALIGNED(8192) void tbut128b_%CONFIG%_%ISA%(real *RESTRICT out0, uint32_t *q, const real *RESTRICT in0, const int inShift, const real *RESTRICT tbl, const int K) { const int k = 1 << (inShift - LOG2VECWIDTH); int i; #pragma omp parallel for for(i=0;i < k;i++) { int i0 = i << LOG2VECWIDTH; real *out = out0 + q[i]; const real *in = in0 + i0*2; const int tbloffset = K * i0; // Pres : 148586 real2 v56 = load(in, 54 << inShift); real2 v120 = load(in, 118 << inShift); real2 v571 = reverse(minus(v120, v56)); real2 v577 = plus(v56, v120); real2 v24 = load(in, 22 << inShift); real2 v88 = load(in, 86 << inShift); real2 v576 = plus(v24, v88); real2 v572 = minus(v88, v24); real2 v573 = minusplus(v571, v572); real2 v575 = minusplus(uminus(v571), v572); real2 v589 = timesminusplus(reverse(v575), load(tbl, 92 * VECWIDTH + tbloffset), times(v575, load(tbl, 93 * VECWIDTH + tbloffset))); real2 v583 = timesminusplus(reverse(v573), load(tbl, 90 * VECWIDTH + tbloffset), times(v573, load(tbl, 91 * VECWIDTH + tbloffset))); real2 v897 = plus(v576, v577); real2 v891 = reverse(minus(v577, v576)); real2 v8 = load(in, 6 << inShift); real2 v72 = load(in, 70 << inShift); real2 v252 = minus(v72, v8); real2 v256 = plus(v8, v72); real2 v104 = load(in, 102 << inShift); real2 v40 = load(in, 38 << inShift); real2 v251 = reverse(minus(v104, v40)); real2 v257 = plus(v40, v104); real2 v255 = minusplus(uminus(v251), v252); real2 v253 = minusplus(v251, v252); real2 v263 = timesminusplus(reverse(v253), load(tbl, 26 * VECWIDTH + tbloffset), times(v253, load(tbl, 27 * VECWIDTH + tbloffset))); real2 v896 = plus(v256, v257); real2 v892 = minus(v257, v256); real2 v895 = minusplus(uminus(v891), v892); real2 v893 = minusplus(v891, v892); real2 v909 = timesminusplus(reverse(v895), load(tbl, 156 * VECWIDTH + tbloffset), times(v895, load(tbl, 157 * VECWIDTH + tbloffset))); real2 v903 = timesminusplus(reverse(v893), load(tbl, 154 * VECWIDTH + tbloffset), times(v893, load(tbl, 155 * VECWIDTH + tbloffset))); real2 v269 = timesminusplus(reverse(v255), load(tbl, 28 * VECWIDTH + tbloffset), times(v255, load(tbl, 29 * VECWIDTH + tbloffset))); real2 v1216 = plus(v896, v897); real2 v1212 = minus(v897, v896); real2 v2160 = minus(v583, v263); real2 v2164 = plus(v263, v583); real2 v2686 = minus(v589, v269); real2 v2690 = plus(v269, v589); real2 v96 = load(in, 94 << inShift); real2 v32 = load(in, 30 << inShift); real2 v736 = plus(v32, v96); real2 v732 = minus(v96, v32); real2 v64 = load(in, 62 << inShift); real2 v128 = load(in, 126 << inShift); real2 v737 = plus(v64, v128); real2 v731 = reverse(minus(v128, v64)); real2 v1057 = plus(v736, v737); real2 v1051 = reverse(minus(v737, v736)); real2 v733 = minusplus(v731, v732); real2 v735 = minusplus(uminus(v731), v732); real2 v749 = timesminusplus(reverse(v735), load(tbl, 124 * VECWIDTH + tbloffset), times(v735, load(tbl, 125 * VECWIDTH + tbloffset))); real2 v743 = timesminusplus(reverse(v733), load(tbl, 122 * VECWIDTH + tbloffset), times(v733, load(tbl, 123 * VECWIDTH + tbloffset))); real2 v16 = load(in, 14 << inShift); real2 v80 = load(in, 78 << inShift); real2 v412 = minus(v80, v16); real2 v416 = plus(v16, v80); real2 v112 = load(in, 110 << inShift); real2 v48 = load(in, 46 << inShift); real2 v417 = plus(v48, v112); real2 v411 = reverse(minus(v112, v48)); real2 v1056 = plus(v416, v417); real2 v1052 = minus(v417, v416); real2 v1055 = minusplus(uminus(v1051), v1052); real2 v1053 = minusplus(v1051, v1052); real2 v1063 = timesminusplus(reverse(v1053), load(tbl, 186 * VECWIDTH + tbloffset), times(v1053, load(tbl, 187 * VECWIDTH + tbloffset))); real2 v1665 = plus(v903, v1063); real2 v1659 = reverse(minus(v1063, v903)); real2 v1069 = timesminusplus(reverse(v1055), load(tbl, 188 * VECWIDTH + tbloffset), times(v1055, load(tbl, 189 * VECWIDTH + tbloffset))); real2 v1869 = reverse(minus(v1069, v909)); real2 v1875 = plus(v909, v1069); real2 v413 = minusplus(v411, v412); real2 v415 = minusplus(uminus(v411), v412); real2 v429 = timesminusplus(reverse(v415), load(tbl, 60 * VECWIDTH + tbloffset), times(v415, load(tbl, 61 * VECWIDTH + tbloffset))); real2 v1217 = plus(v1056, v1057); real2 v1211 = reverse(minus(v1057, v1056)); real2 v1297 = plus(v1216, v1217); real2 v1291 = reverse(minus(v1217, v1216)); real2 v2691 = plus(v429, v749); real2 v2685 = reverse(minus(v749, v429)); real2 v2765 = reverse(minus(v2691, v2690)); real2 v2771 = plus(v2690, v2691); real2 v2689 = minusplus(uminus(v2685), v2686); real2 v2687 = minusplus(v2685, v2686); real2 v2703 = timesminusplus(reverse(v2689), load(tbl, 476 * VECWIDTH + tbloffset), times(v2689, load(tbl, 477 * VECWIDTH + tbloffset))); real2 v2697 = timesminusplus(reverse(v2687), load(tbl, 474 * VECWIDTH + tbloffset), times(v2687, load(tbl, 475 * VECWIDTH + tbloffset))); real2 v1215 = minusplus(uminus(v1211), v1212); real2 v1213 = minusplus(v1211, v1212); real2 v1223 = timesminusplus(reverse(v1213), load(tbl, 218 * VECWIDTH + tbloffset), times(v1213, load(tbl, 219 * VECWIDTH + tbloffset))); real2 v1229 = timesminusplus(reverse(v1215), load(tbl, 220 * VECWIDTH + tbloffset), times(v1215, load(tbl, 221 * VECWIDTH + tbloffset))); real2 v423 = timesminusplus(reverse(v413), load(tbl, 58 * VECWIDTH + tbloffset), times(v413, load(tbl, 59 * VECWIDTH + tbloffset))); real2 v2165 = plus(v423, v743); real2 v2159 = reverse(minus(v743, v423)); real2 v2245 = plus(v2164, v2165); real2 v2239 = reverse(minus(v2165, v2164)); real2 v44 = load(in, 42 << inShift); real2 v108 = load(in, 106 << inShift); real2 v331 = reverse(minus(v108, v44)); real2 v337 = plus(v44, v108); real2 v76 = load(in, 74 << inShift); real2 v12 = load(in, 10 << inShift); real2 v336 = plus(v12, v76); real2 v332 = minus(v76, v12); real2 v976 = plus(v336, v337); real2 v972 = minus(v337, v336); real2 v335 = minusplus(uminus(v331), v332); real2 v333 = minusplus(v331, v332); real2 v343 = timesminusplus(reverse(v333), load(tbl, 42 * VECWIDTH + tbloffset), times(v333, load(tbl, 43 * VECWIDTH + tbloffset))); real2 v349 = timesminusplus(reverse(v335), load(tbl, 44 * VECWIDTH + tbloffset), times(v335, load(tbl, 45 * VECWIDTH + tbloffset))); real2 v124 = load(in, 122 << inShift); real2 v60 = load(in, 58 << inShift); real2 v651 = reverse(minus(v124, v60)); real2 v657 = plus(v60, v124); real2 v28 = load(in, 26 << inShift); real2 v92 = load(in, 90 << inShift); real2 v652 = minus(v92, v28); real2 v656 = plus(v28, v92); real2 v977 = plus(v656, v657); real2 v971 = reverse(minus(v657, v656)); real2 v973 = minusplus(v971, v972); real2 v975 = minusplus(uminus(v971), v972); real2 v983 = timesminusplus(reverse(v973), load(tbl, 170 * VECWIDTH + tbloffset), times(v973, load(tbl, 171 * VECWIDTH + tbloffset))); real2 v1131 = reverse(minus(v977, v976)); real2 v1137 = plus(v976, v977); real2 v655 = minusplus(uminus(v651), v652); real2 v653 = minusplus(v651, v652); real2 v669 = timesminusplus(reverse(v655), load(tbl, 108 * VECWIDTH + tbloffset), times(v655, load(tbl, 109 * VECWIDTH + tbloffset))); real2 v663 = timesminusplus(reverse(v653), load(tbl, 106 * VECWIDTH + tbloffset), times(v653, load(tbl, 107 * VECWIDTH + tbloffset))); real2 v2079 = reverse(minus(v663, v343)); real2 v2085 = plus(v343, v663); real2 v2605 = reverse(minus(v669, v349)); real2 v2611 = plus(v349, v669); real2 v989 = timesminusplus(reverse(v975), load(tbl, 172 * VECWIDTH + tbloffset), times(v975, load(tbl, 173 * VECWIDTH + tbloffset))); real2 v20 = load(in, 18 << inShift); real2 v84 = load(in, 82 << inShift); real2 v496 = plus(v20, v84); real2 v492 = minus(v84, v20); real2 v52 = load(in, 50 << inShift); real2 v116 = load(in, 114 << inShift); real2 v491 = reverse(minus(v116, v52)); real2 v497 = plus(v52, v116); real2 v817 = plus(v496, v497); real2 v811 = reverse(minus(v497, v496)); real2 v493 = minusplus(v491, v492); real2 v495 = minusplus(uminus(v491), v492); real2 v509 = timesminusplus(reverse(v495), load(tbl, 76 * VECWIDTH + tbloffset), times(v495, load(tbl, 77 * VECWIDTH + tbloffset))); real2 v503 = timesminusplus(reverse(v493), load(tbl, 74 * VECWIDTH + tbloffset), times(v493, load(tbl, 75 * VECWIDTH + tbloffset))); real2 v36 = load(in, 34 << inShift); real2 v100 = load(in, 98 << inShift); real2 v171 = reverse(minus(v100, v36)); real2 v177 = plus(v36, v100); real2 v68 = load(in, 66 << inShift); real2 v4 = load(in, 2 << inShift); real2 v176 = plus(v4, v68); real2 v172 = minus(v68, v4); real2 v816 = plus(v176, v177); real2 v812 = minus(v177, v176); real2 v1136 = plus(v816, v817); real2 v1132 = minus(v817, v816); real2 v1133 = minusplus(v1131, v1132); real2 v1135 = minusplus(uminus(v1131), v1132); real2 v1149 = timesminusplus(reverse(v1135), load(tbl, 204 * VECWIDTH + tbloffset), times(v1135, load(tbl, 205 * VECWIDTH + tbloffset))); real2 v1296 = plus(v1136, v1137); real2 v1292 = minus(v1137, v1136); real2 v1295 = minusplus(uminus(v1291), v1292); real2 v1293 = minusplus(v1291, v1292); real2 v1303 = timesminusplus(reverse(v1293), load(tbl, 234 * VECWIDTH + tbloffset), times(v1293, load(tbl, 235 * VECWIDTH + tbloffset))); real2 v1331 = reverse(minus(v1297, v1296)); real2 v1337 = plus(v1296, v1297); real2 v173 = minusplus(v171, v172); real2 v175 = minusplus(uminus(v171), v172); real2 v189 = timesminusplus(reverse(v175), load(tbl, 12 * VECWIDTH + tbloffset), times(v175, load(tbl, 13 * VECWIDTH + tbloffset))); real2 v1309 = timesminusplus(reverse(v1295), load(tbl, 236 * VECWIDTH + tbloffset), times(v1295, load(tbl, 237 * VECWIDTH + tbloffset))); real2 v815 = minusplus(uminus(v811), v812); real2 v813 = minusplus(v811, v812); real2 v1143 = timesminusplus(reverse(v1133), load(tbl, 202 * VECWIDTH + tbloffset), times(v1133, load(tbl, 203 * VECWIDTH + tbloffset))); real2 v1541 = reverse(minus(v1229, v1149)); real2 v1547 = plus(v1149, v1229); real2 v2610 = plus(v189, v509); real2 v2606 = minus(v509, v189); real2 v2770 = plus(v2610, v2611); real2 v2766 = minus(v2611, v2610); real2 v823 = timesminusplus(reverse(v813), load(tbl, 138 * VECWIDTH + tbloffset), times(v813, load(tbl, 139 * VECWIDTH + tbloffset))); real2 v829 = timesminusplus(reverse(v815), load(tbl, 140 * VECWIDTH + tbloffset), times(v815, load(tbl, 141 * VECWIDTH + tbloffset))); real2 v2811 = plus(v2770, v2771); real2 v2805 = reverse(minus(v2771, v2770)); real2 v2767 = minusplus(v2765, v2766); real2 v2769 = minusplus(uminus(v2765), v2766); real2 v2607 = minusplus(v2605, v2606); real2 v2609 = minusplus(uminus(v2605), v2606); real2 v2617 = timesminusplus(reverse(v2607), load(tbl, 458 * VECWIDTH + tbloffset), times(v2607, load(tbl, 459 * VECWIDTH + tbloffset))); real2 v2623 = timesminusplus(reverse(v2609), load(tbl, 460 * VECWIDTH + tbloffset), times(v2609, load(tbl, 461 * VECWIDTH + tbloffset))); real2 v3013 = reverse(minus(v2703, v2623)); real2 v3019 = plus(v2623, v2703); real2 v2783 = timesminusplus(reverse(v2769), load(tbl, 492 * VECWIDTH + tbloffset), times(v2769, load(tbl, 493 * VECWIDTH + tbloffset))); real2 v2941 = plus(v2617, v2697); real2 v2935 = reverse(minus(v2697, v2617)); real2 v2777 = timesminusplus(reverse(v2767), load(tbl, 490 * VECWIDTH + tbloffset), times(v2767, load(tbl, 491 * VECWIDTH + tbloffset))); real2 v1660 = minus(v983, v823); real2 v1664 = plus(v823, v983); real2 v1874 = plus(v829, v989); real2 v1870 = minus(v989, v829); real2 v1909 = reverse(minus(v1875, v1874)); real2 v1915 = plus(v1874, v1875); real2 v1663 = minusplus(uminus(v1659), v1660); real2 v1661 = minusplus(v1659, v1660); real2 v1677 = timesminusplus(reverse(v1663), load(tbl, 296 * VECWIDTH + tbloffset), times(v1663, load(tbl, 297 * VECWIDTH + tbloffset))); real2 v1873 = minusplus(uminus(v1869), v1870); real2 v1871 = minusplus(v1869, v1870); real2 v1887 = timesminusplus(reverse(v1873), load(tbl, 332 * VECWIDTH + tbloffset), times(v1873, load(tbl, 333 * VECWIDTH + tbloffset))); real2 v1705 = plus(v1664, v1665); real2 v1699 = reverse(minus(v1665, v1664)); real2 v1671 = timesminusplus(reverse(v1661), load(tbl, 294 * VECWIDTH + tbloffset), times(v1661, load(tbl, 295 * VECWIDTH + tbloffset))); real2 v1881 = timesminusplus(reverse(v1871), load(tbl, 330 * VECWIDTH + tbloffset), times(v1871, load(tbl, 331 * VECWIDTH + tbloffset))); real2 v1469 = plus(v1143, v1223); real2 v1463 = reverse(minus(v1223, v1143)); real2 v54 = load(in, 52 << inShift); real2 v118 = load(in, 116 << inShift); real2 v537 = plus(v54, v118); real2 v531 = reverse(minus(v118, v54)); real2 v86 = load(in, 84 << inShift); real2 v22 = load(in, 20 << inShift); real2 v536 = plus(v22, v86); real2 v532 = minus(v86, v22); real2 v851 = reverse(minus(v537, v536)); real2 v857 = plus(v536, v537); real2 v533 = minusplus(v531, v532); real2 v535 = minusplus(uminus(v531), v532); real2 v549 = timesminusplus(reverse(v535), load(tbl, 84 * VECWIDTH + tbloffset), times(v535, load(tbl, 85 * VECWIDTH + tbloffset))); real2 v102 = load(in, 100 << inShift); real2 v38 = load(in, 36 << inShift); real2 v217 = plus(v38, v102); real2 v211 = reverse(minus(v102, v38)); real2 v70 = load(in, 68 << inShift); real2 v6 = load(in, 4 << inShift); real2 v216 = plus(v6, v70); real2 v212 = minus(v70, v6); real2 v213 = minusplus(v211, v212); real2 v215 = minusplus(uminus(v211), v212); real2 v229 = timesminusplus(reverse(v215), load(tbl, 20 * VECWIDTH + tbloffset), times(v215, load(tbl, 21 * VECWIDTH + tbloffset))); real2 v2646 = minus(v549, v229); real2 v2650 = plus(v229, v549); real2 v856 = plus(v216, v217); real2 v852 = minus(v217, v216); real2 v853 = minusplus(v851, v852); real2 v855 = minusplus(uminus(v851), v852); real2 v863 = timesminusplus(reverse(v853), load(tbl, 146 * VECWIDTH + tbloffset), times(v853, load(tbl, 147 * VECWIDTH + tbloffset))); real2 v869 = timesminusplus(reverse(v855), load(tbl, 148 * VECWIDTH + tbloffset), times(v855, load(tbl, 149 * VECWIDTH + tbloffset))); real2 v1176 = plus(v856, v857); real2 v1172 = minus(v857, v856); real2 v110 = load(in, 108 << inShift); real2 v46 = load(in, 44 << inShift); real2 v377 = plus(v46, v110); real2 v371 = reverse(minus(v110, v46)); real2 v78 = load(in, 76 << inShift); real2 v14 = load(in, 12 << inShift); real2 v372 = minus(v78, v14); real2 v376 = plus(v14, v78); real2 v1012 = minus(v377, v376); real2 v1016 = plus(v376, v377); real2 v373 = minusplus(v371, v372); real2 v375 = minusplus(uminus(v371), v372); real2 v389 = timesminusplus(reverse(v375), load(tbl, 52 * VECWIDTH + tbloffset), times(v375, load(tbl, 53 * VECWIDTH + tbloffset))); real2 v30 = load(in, 28 << inShift); real2 v94 = load(in, 92 << inShift); real2 v696 = plus(v30, v94); real2 v692 = minus(v94, v30); real2 v62 = load(in, 60 << inShift); real2 v126 = load(in, 124 << inShift); real2 v697 = plus(v62, v126); real2 v691 = reverse(minus(v126, v62)); real2 v1017 = plus(v696, v697); real2 v1011 = reverse(minus(v697, v696)); real2 v1171 = reverse(minus(v1017, v1016)); real2 v1177 = plus(v1016, v1017); real2 v1013 = minusplus(v1011, v1012); real2 v1015 = minusplus(uminus(v1011), v1012); real2 v1175 = minusplus(uminus(v1171), v1172); real2 v1173 = minusplus(v1171, v1172); real2 v1183 = timesminusplus(reverse(v1173), load(tbl, 210 * VECWIDTH + tbloffset), times(v1173, load(tbl, 211 * VECWIDTH + tbloffset))); real2 v1189 = timesminusplus(reverse(v1175), load(tbl, 212 * VECWIDTH + tbloffset), times(v1175, load(tbl, 213 * VECWIDTH + tbloffset))); real2 v1029 = timesminusplus(reverse(v1015), load(tbl, 180 * VECWIDTH + tbloffset), times(v1015, load(tbl, 181 * VECWIDTH + tbloffset))); real2 v1023 = timesminusplus(reverse(v1013), load(tbl, 178 * VECWIDTH + tbloffset), times(v1013, load(tbl, 179 * VECWIDTH + tbloffset))); real2 v1625 = plus(v863, v1023); real2 v1619 = reverse(minus(v1023, v863)); real2 v1835 = plus(v869, v1029); real2 v1829 = reverse(minus(v1029, v869)); real2 v693 = minusplus(v691, v692); real2 v695 = minusplus(uminus(v691), v692); real2 v709 = timesminusplus(reverse(v695), load(tbl, 116 * VECWIDTH + tbloffset), times(v695, load(tbl, 117 * VECWIDTH + tbloffset))); real2 v2645 = reverse(minus(v709, v389)); real2 v2651 = plus(v389, v709); real2 v1257 = plus(v1176, v1177); real2 v1251 = reverse(minus(v1177, v1176)); real2 v2731 = plus(v2650, v2651); real2 v2725 = reverse(minus(v2651, v2650)); real2 v114 = load(in, 112 << inShift); real2 v50 = load(in, 48 << inShift); real2 v457 = plus(v50, v114); real2 v451 = reverse(minus(v114, v50)); real2 v18 = load(in, 16 << inShift); real2 v82 = load(in, 80 << inShift); real2 v456 = plus(v18, v82); real2 v452 = minus(v82, v18); real2 v771 = reverse(minus(v457, v456)); real2 v777 = plus(v456, v457); real2 v453 = minusplus(v451, v452); real2 v455 = minusplus(uminus(v451), v452); real2 v469 = timesminusplus(reverse(v455), load(tbl, 68 * VECWIDTH + tbloffset), times(v455, load(tbl, 69 * VECWIDTH + tbloffset))); real2 v66 = load(in, 64 << inShift); real2 v2 = load(in, 0 << inShift); real2 v132 = minus(v66, v2); real2 v136 = plus(v2, v66); real2 v98 = load(in, 96 << inShift); real2 v34 = load(in, 32 << inShift); real2 v131 = reverse(minus(v98, v34)); real2 v137 = plus(v34, v98); real2 v133 = minusplus(v131, v132); real2 v135 = minusplus(uminus(v131), v132); real2 v149 = timesminusplus(reverse(v135), load(tbl, 4 * VECWIDTH + tbloffset), times(v135, load(tbl, 5 * VECWIDTH + tbloffset))); real2 v2566 = minus(v469, v149); real2 v2570 = plus(v149, v469); real2 v772 = minus(v137, v136); real2 v776 = plus(v136, v137); real2 v1092 = minus(v777, v776); real2 v1096 = plus(v776, v777); real2 v773 = minusplus(v771, v772); real2 v775 = minusplus(uminus(v771), v772); real2 v783 = timesminusplus(reverse(v773), load(tbl, 130 * VECWIDTH + tbloffset), times(v773, load(tbl, 131 * VECWIDTH + tbloffset))); real2 v789 = timesminusplus(reverse(v775), load(tbl, 132 * VECWIDTH + tbloffset), times(v775, load(tbl, 133 * VECWIDTH + tbloffset))); real2 v74 = load(in, 72 << inShift); real2 v10 = load(in, 8 << inShift); real2 v296 = plus(v10, v74); real2 v292 = minus(v74, v10); real2 v42 = load(in, 40 << inShift); real2 v106 = load(in, 104 << inShift); real2 v291 = reverse(minus(v106, v42)); real2 v297 = plus(v42, v106); real2 v293 = minusplus(v291, v292); real2 v295 = minusplus(uminus(v291), v292); real2 v309 = timesminusplus(reverse(v295), load(tbl, 36 * VECWIDTH + tbloffset), times(v295, load(tbl, 37 * VECWIDTH + tbloffset))); real2 v932 = minus(v297, v296); real2 v936 = plus(v296, v297); real2 v122 = load(in, 120 << inShift); real2 v58 = load(in, 56 << inShift); real2 v617 = plus(v58, v122); real2 v611 = reverse(minus(v122, v58)); real2 v26 = load(in, 24 << inShift); real2 v90 = load(in, 88 << inShift); real2 v612 = minus(v90, v26); real2 v616 = plus(v26, v90); real2 v937 = plus(v616, v617); real2 v931 = reverse(minus(v617, v616)); real2 v1091 = reverse(minus(v937, v936)); real2 v1097 = plus(v936, v937); real2 v933 = minusplus(v931, v932); real2 v935 = minusplus(uminus(v931), v932); real2 v1093 = minusplus(v1091, v1092); real2 v1095 = minusplus(uminus(v1091), v1092); real2 v1103 = timesminusplus(reverse(v1093), load(tbl, 194 * VECWIDTH + tbloffset), times(v1093, load(tbl, 195 * VECWIDTH + tbloffset))); real2 v1468 = plus(v1103, v1183); real2 v1464 = minus(v1183, v1103); real2 v1508 = plus(v1468, v1469); real2 v1504 = minus(v1469, v1468); real2 v1252 = minus(v1097, v1096); real2 v1256 = plus(v1096, v1097); real2 v1336 = plus(v1256, v1257); real2 v1332 = minus(v1257, v1256); real2 v1335 = minusplus(uminus(v1331), v1332); real2 v1333 = minusplus(v1331, v1332); real2 v1343 = timesminusplus(reverse(v1333), load(tbl, 242 * VECWIDTH + tbloffset), times(v1333, load(tbl, 243 * VECWIDTH + tbloffset))); real2 v1349 = timesminusplus(reverse(v1335), load(tbl, 244 * VECWIDTH + tbloffset), times(v1335, load(tbl, 245 * VECWIDTH + tbloffset))); real2 v1376 = plus(v1336, v1337); real2 v1372 = minus(v1337, v1336); real2 v1465 = minusplus(v1463, v1464); real2 v1467 = minusplus(uminus(v1463), v1464); real2 v1255 = minusplus(uminus(v1251), v1252); real2 v1253 = minusplus(v1251, v1252); real2 v1481 = timesminusplus(reverse(v1467), load(tbl, 264 * VECWIDTH + tbloffset), times(v1467, load(tbl, 265 * VECWIDTH + tbloffset))); real2 v1475 = timesminusplus(reverse(v1465), load(tbl, 262 * VECWIDTH + tbloffset), times(v1465, load(tbl, 263 * VECWIDTH + tbloffset))); real2 v1109 = timesminusplus(reverse(v1095), load(tbl, 196 * VECWIDTH + tbloffset), times(v1095, load(tbl, 197 * VECWIDTH + tbloffset))); real2 v1542 = minus(v1189, v1109); real2 v1546 = plus(v1109, v1189); real2 v1545 = minusplus(uminus(v1541), v1542); real2 v1543 = minusplus(v1541, v1542); real2 v1553 = timesminusplus(reverse(v1543), load(tbl, 274 * VECWIDTH + tbloffset), times(v1543, load(tbl, 275 * VECWIDTH + tbloffset))); real2 v1559 = timesminusplus(reverse(v1545), load(tbl, 276 * VECWIDTH + tbloffset), times(v1545, load(tbl, 277 * VECWIDTH + tbloffset))); real2 v1582 = minus(v1547, v1546); real2 v1586 = plus(v1546, v1547); real2 v1269 = timesminusplus(reverse(v1255), load(tbl, 228 * VECWIDTH + tbloffset), times(v1255, load(tbl, 229 * VECWIDTH + tbloffset))); real2 v1438 = minus(v1309, v1269); real2 v1442 = plus(v1269, v1309); real2 v1263 = timesminusplus(reverse(v1253), load(tbl, 226 * VECWIDTH + tbloffset), times(v1253, load(tbl, 227 * VECWIDTH + tbloffset))); real2 v943 = timesminusplus(reverse(v933), load(tbl, 162 * VECWIDTH + tbloffset), times(v933, load(tbl, 163 * VECWIDTH + tbloffset))); real2 v1624 = plus(v783, v943); real2 v1620 = minus(v943, v783); real2 v1623 = minusplus(uminus(v1619), v1620); real2 v1621 = minusplus(v1619, v1620); real2 v1700 = minus(v1625, v1624); real2 v1704 = plus(v1624, v1625); real2 v1631 = timesminusplus(reverse(v1621), load(tbl, 286 * VECWIDTH + tbloffset), times(v1621, load(tbl, 287 * VECWIDTH + tbloffset))); real2 v949 = timesminusplus(reverse(v935), load(tbl, 164 * VECWIDTH + tbloffset), times(v935, load(tbl, 165 * VECWIDTH + tbloffset))); real2 v1830 = minus(v949, v789); real2 v1834 = plus(v789, v949); real2 v1782 = plus(v1631, v1671); real2 v1778 = minus(v1671, v1631); real2 v1910 = minus(v1835, v1834); real2 v1914 = plus(v1834, v1835); real2 v1950 = minus(v1915, v1914); real2 v1954 = plus(v1914, v1915); real2 v1913 = minusplus(uminus(v1909), v1910); real2 v1911 = minusplus(v1909, v1910); real2 v613 = minusplus(v611, v612); real2 v615 = minusplus(uminus(v611), v612); real2 v629 = timesminusplus(reverse(v615), load(tbl, 100 * VECWIDTH + tbloffset), times(v615, load(tbl, 101 * VECWIDTH + tbloffset))); real2 v1744 = plus(v1704, v1705); real2 v1740 = minus(v1705, v1704); real2 v1637 = timesminusplus(reverse(v1623), load(tbl, 288 * VECWIDTH + tbloffset), times(v1623, load(tbl, 289 * VECWIDTH + tbloffset))); real2 v1927 = timesminusplus(reverse(v1913), load(tbl, 340 * VECWIDTH + tbloffset), times(v1913, load(tbl, 341 * VECWIDTH + tbloffset))); real2 v2571 = plus(v309, v629); real2 v2565 = reverse(minus(v629, v309)); real2 v1833 = minusplus(uminus(v1829), v1830); real2 v1831 = minusplus(v1829, v1830); real2 v1921 = timesminusplus(reverse(v1911), load(tbl, 338 * VECWIDTH + tbloffset), times(v1911, load(tbl, 339 * VECWIDTH + tbloffset))); real2 v1804 = minus(v1677, v1637); real2 v1808 = plus(v1637, v1677); real2 v1847 = timesminusplus(reverse(v1833), load(tbl, 324 * VECWIDTH + tbloffset), times(v1833, load(tbl, 325 * VECWIDTH + tbloffset))); real2 v2014 = minus(v1887, v1847); real2 v2018 = plus(v1847, v1887); real2 v1841 = timesminusplus(reverse(v1831), load(tbl, 322 * VECWIDTH + tbloffset), times(v1831, load(tbl, 323 * VECWIDTH + tbloffset))); real2 v1988 = minus(v1881, v1841); real2 v1992 = plus(v1841, v1881); real2 v1703 = minusplus(uminus(v1699), v1700); real2 v1701 = minusplus(v1699, v1700); real2 v1717 = timesminusplus(reverse(v1703), load(tbl, 304 * VECWIDTH + tbloffset), times(v1703, load(tbl, 305 * VECWIDTH + tbloffset))); real2 v1711 = timesminusplus(reverse(v1701), load(tbl, 302 * VECWIDTH + tbloffset), times(v1701, load(tbl, 303 * VECWIDTH + tbloffset))); real2 v2730 = plus(v2570, v2571); real2 v2726 = minus(v2571, v2570); real2 v1412 = minus(v1303, v1263); real2 v1416 = plus(v1263, v1303); real2 v63 = load(in, 61 << inShift); real2 v127 = load(in, 125 << inShift); real2 v717 = plus(v63, v127); real2 v711 = reverse(minus(v127, v63)); real2 v95 = load(in, 93 << inShift); real2 v31 = load(in, 29 << inShift); real2 v712 = minus(v95, v31); real2 v716 = plus(v31, v95); real2 v1037 = plus(v716, v717); real2 v1031 = reverse(minus(v717, v716)); real2 v79 = load(in, 77 << inShift); real2 v15 = load(in, 13 << inShift); real2 v396 = plus(v15, v79); real2 v392 = minus(v79, v15); real2 v111 = load(in, 109 << inShift); real2 v47 = load(in, 45 << inShift); real2 v397 = plus(v47, v111); real2 v391 = reverse(minus(v111, v47)); real2 v1032 = minus(v397, v396); real2 v1036 = plus(v396, v397); real2 v1033 = minusplus(v1031, v1032); real2 v1035 = minusplus(uminus(v1031), v1032); real2 v1049 = timesminusplus(reverse(v1035), load(tbl, 184 * VECWIDTH + tbloffset), times(v1035, load(tbl, 185 * VECWIDTH + tbloffset))); real2 v1043 = timesminusplus(reverse(v1033), load(tbl, 182 * VECWIDTH + tbloffset), times(v1033, load(tbl, 183 * VECWIDTH + tbloffset))); real2 v1197 = plus(v1036, v1037); real2 v1191 = reverse(minus(v1037, v1036)); real2 v23 = load(in, 21 << inShift); real2 v87 = load(in, 85 << inShift); real2 v556 = plus(v23, v87); real2 v552 = minus(v87, v23); real2 v119 = load(in, 117 << inShift); real2 v55 = load(in, 53 << inShift); real2 v557 = plus(v55, v119); real2 v551 = reverse(minus(v119, v55)); real2 v877 = plus(v556, v557); real2 v871 = reverse(minus(v557, v556)); real2 v7 = load(in, 5 << inShift); real2 v71 = load(in, 69 << inShift); real2 v232 = minus(v71, v7); real2 v236 = plus(v7, v71); real2 v103 = load(in, 101 << inShift); real2 v39 = load(in, 37 << inShift); real2 v237 = plus(v39, v103); real2 v231 = reverse(minus(v103, v39)); real2 v876 = plus(v236, v237); real2 v872 = minus(v237, v236); real2 v1192 = minus(v877, v876); real2 v1196 = plus(v876, v877); real2 v1271 = reverse(minus(v1197, v1196)); real2 v1277 = plus(v1196, v1197); real2 v875 = minusplus(uminus(v871), v872); real2 v873 = minusplus(v871, v872); real2 v883 = timesminusplus(reverse(v873), load(tbl, 150 * VECWIDTH + tbloffset), times(v873, load(tbl, 151 * VECWIDTH + tbloffset))); real2 v1639 = reverse(minus(v1043, v883)); real2 v1645 = plus(v883, v1043); real2 v1195 = minusplus(uminus(v1191), v1192); real2 v1193 = minusplus(v1191, v1192); real2 v1209 = timesminusplus(reverse(v1195), load(tbl, 216 * VECWIDTH + tbloffset), times(v1195, load(tbl, 217 * VECWIDTH + tbloffset))); real2 v1203 = timesminusplus(reverse(v1193), load(tbl, 214 * VECWIDTH + tbloffset), times(v1193, load(tbl, 215 * VECWIDTH + tbloffset))); real2 v83 = load(in, 81 << inShift); real2 v19 = load(in, 17 << inShift); real2 v476 = plus(v19, v83); real2 v472 = minus(v83, v19); real2 v51 = load(in, 49 << inShift); real2 v115 = load(in, 113 << inShift); real2 v477 = plus(v51, v115); real2 v471 = reverse(minus(v115, v51)); real2 v797 = plus(v476, v477); real2 v791 = reverse(minus(v477, v476)); real2 v3 = load(in, 1 << inShift); real2 v67 = load(in, 65 << inShift); real2 v156 = plus(v3, v67); real2 v152 = minus(v67, v3); real2 v35 = load(in, 33 << inShift); real2 v99 = load(in, 97 << inShift); real2 v157 = plus(v35, v99); real2 v151 = reverse(minus(v99, v35)); real2 v792 = minus(v157, v156); real2 v796 = plus(v156, v157); real2 v793 = minusplus(v791, v792); real2 v795 = minusplus(uminus(v791), v792); real2 v803 = timesminusplus(reverse(v793), load(tbl, 134 * VECWIDTH + tbloffset), times(v793, load(tbl, 135 * VECWIDTH + tbloffset))); real2 v1112 = minus(v797, v796); real2 v1116 = plus(v796, v797); real2 v107 = load(in, 105 << inShift); real2 v43 = load(in, 41 << inShift); real2 v317 = plus(v43, v107); real2 v311 = reverse(minus(v107, v43)); real2 v75 = load(in, 73 << inShift); real2 v11 = load(in, 9 << inShift); real2 v316 = plus(v11, v75); real2 v312 = minus(v75, v11); real2 v956 = plus(v316, v317); real2 v952 = minus(v317, v316); real2 v59 = load(in, 57 << inShift); real2 v123 = load(in, 121 << inShift); real2 v631 = reverse(minus(v123, v59)); real2 v637 = plus(v59, v123); real2 v27 = load(in, 25 << inShift); real2 v91 = load(in, 89 << inShift); real2 v636 = plus(v27, v91); real2 v632 = minus(v91, v27); real2 v957 = plus(v636, v637); real2 v951 = reverse(minus(v637, v636)); real2 v1111 = reverse(minus(v957, v956)); real2 v1117 = plus(v956, v957); real2 v1276 = plus(v1116, v1117); real2 v1272 = minus(v1117, v1116); real2 v1275 = minusplus(uminus(v1271), v1272); real2 v1273 = minusplus(v1271, v1272); real2 v1283 = timesminusplus(reverse(v1273), load(tbl, 230 * VECWIDTH + tbloffset), times(v1273, load(tbl, 231 * VECWIDTH + tbloffset))); real2 v1352 = minus(v1277, v1276); real2 v1356 = plus(v1276, v1277); real2 v1289 = timesminusplus(reverse(v1275), load(tbl, 232 * VECWIDTH + tbloffset), times(v1275, load(tbl, 233 * VECWIDTH + tbloffset))); real2 v1115 = minusplus(uminus(v1111), v1112); real2 v1113 = minusplus(v1111, v1112); real2 v1123 = timesminusplus(reverse(v1113), load(tbl, 198 * VECWIDTH + tbloffset), times(v1113, load(tbl, 199 * VECWIDTH + tbloffset))); real2 v1129 = timesminusplus(reverse(v1115), load(tbl, 200 * VECWIDTH + tbloffset), times(v1115, load(tbl, 201 * VECWIDTH + tbloffset))); real2 v1488 = plus(v1123, v1203); real2 v1484 = minus(v1203, v1123); real2 v1566 = plus(v1129, v1209); real2 v1562 = minus(v1209, v1129); real2 v85 = load(in, 83 << inShift); real2 v21 = load(in, 19 << inShift); real2 v512 = minus(v85, v21); real2 v516 = plus(v21, v85); real2 v117 = load(in, 115 << inShift); real2 v53 = load(in, 51 << inShift); real2 v517 = plus(v53, v117); real2 v511 = reverse(minus(v117, v53)); real2 v831 = reverse(minus(v517, v516)); real2 v837 = plus(v516, v517); real2 v69 = load(in, 67 << inShift); real2 v5 = load(in, 3 << inShift); real2 v192 = minus(v69, v5); real2 v196 = plus(v5, v69); real2 v37 = load(in, 35 << inShift); real2 v101 = load(in, 99 << inShift); real2 v197 = plus(v37, v101); real2 v191 = reverse(minus(v101, v37)); real2 v832 = minus(v197, v196); real2 v836 = plus(v196, v197); real2 v1152 = minus(v837, v836); real2 v1156 = plus(v836, v837); real2 v61 = load(in, 59 << inShift); real2 v125 = load(in, 123 << inShift); real2 v677 = plus(v61, v125); real2 v671 = reverse(minus(v125, v61)); real2 v29 = load(in, 27 << inShift); real2 v93 = load(in, 91 << inShift); real2 v672 = minus(v93, v29); real2 v676 = plus(v29, v93); real2 v997 = plus(v676, v677); real2 v991 = reverse(minus(v677, v676)); real2 v109 = load(in, 107 << inShift); real2 v45 = load(in, 43 << inShift); real2 v357 = plus(v45, v109); real2 v351 = reverse(minus(v109, v45)); real2 v77 = load(in, 75 << inShift); real2 v13 = load(in, 11 << inShift); real2 v352 = minus(v77, v13); real2 v356 = plus(v13, v77); real2 v992 = minus(v357, v356); real2 v996 = plus(v356, v357); real2 v1157 = plus(v996, v997); real2 v1151 = reverse(minus(v997, v996)); real2 v1155 = minusplus(uminus(v1151), v1152); real2 v1153 = minusplus(v1151, v1152); real2 v1163 = timesminusplus(reverse(v1153), load(tbl, 206 * VECWIDTH + tbloffset), times(v1153, load(tbl, 207 * VECWIDTH + tbloffset))); real2 v1316 = plus(v1156, v1157); real2 v1312 = minus(v1157, v1156); real2 v41 = load(in, 39 << inShift); real2 v105 = load(in, 103 << inShift); real2 v277 = plus(v41, v105); real2 v271 = reverse(minus(v105, v41)); real2 v9 = load(in, 7 << inShift); real2 v73 = load(in, 71 << inShift); real2 v276 = plus(v9, v73); real2 v272 = minus(v73, v9); real2 v916 = plus(v276, v277); real2 v912 = minus(v277, v276); real2 v89 = load(in, 87 << inShift); real2 v25 = load(in, 23 << inShift); real2 v592 = minus(v89, v25); real2 v596 = plus(v25, v89); real2 v57 = load(in, 55 << inShift); real2 v121 = load(in, 119 << inShift); real2 v591 = reverse(minus(v121, v57)); real2 v597 = plus(v57, v121); real2 v911 = reverse(minus(v597, v596)); real2 v917 = plus(v596, v597); real2 v1236 = plus(v916, v917); real2 v1232 = minus(v917, v916); real2 v81 = load(in, 79 << inShift); real2 v17 = load(in, 15 << inShift); real2 v432 = minus(v81, v17); real2 v436 = plus(v17, v81); real2 v113 = load(in, 111 << inShift); real2 v49 = load(in, 47 << inShift); real2 v437 = plus(v49, v113); real2 v431 = reverse(minus(v113, v49)); real2 v1072 = minus(v437, v436); real2 v1076 = plus(v436, v437); real2 v65 = load(in, 63 << inShift); real2 v129 = load(in, 127 << inShift); real2 v757 = plus(v65, v129); real2 v751 = reverse(minus(v129, v65)); real2 v97 = load(in, 95 << inShift); real2 v33 = load(in, 31 << inShift); real2 v752 = minus(v97, v33); real2 v756 = plus(v33, v97); real2 v1077 = plus(v756, v757); real2 v1071 = reverse(minus(v757, v756)); real2 v1231 = reverse(minus(v1077, v1076)); real2 v1237 = plus(v1076, v1077); real2 v1317 = plus(v1236, v1237); real2 v1311 = reverse(minus(v1237, v1236)); real2 v1351 = reverse(minus(v1317, v1316)); real2 v1357 = plus(v1316, v1317); real2 v1371 = reverse(minus(v1357, v1356)); real2 v1377 = plus(v1356, v1357); scatter(out, 0, 128, plus(v1376, v1377)); real2 v1390 = minus(v1376, v1377); scatter(out, 64, 128, timesminusplus(v1390, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1390), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1353 = minusplus(v1351, v1352); real2 v1355 = minusplus(uminus(v1351), v1352); real2 v1369 = timesminusplus(reverse(v1355), load(tbl, 248 * VECWIDTH + tbloffset), times(v1355, load(tbl, 249 * VECWIDTH + tbloffset))); scatter(out, 48, 128, plus(v1349, v1369)); real2 v1404 = minus(v1349, v1369); scatter(out, 112, 128, timesminusplus(v1404, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1404), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1363 = timesminusplus(reverse(v1353), load(tbl, 246 * VECWIDTH + tbloffset), times(v1353, load(tbl, 247 * VECWIDTH + tbloffset))); scatter(out, 16, 128, plus(v1343, v1363)); real2 v1398 = minus(v1343, v1363); scatter(out, 80, 128, timesminusplus(v1398, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1398), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1373 = minusplus(v1371, v1372); real2 v1375 = minusplus(uminus(v1371), v1372); scatter(out, 96, 128, timesminusplus(reverse(v1375), load(tbl, 252 * VECWIDTH + tbloffset), times(v1375, load(tbl, 253 * VECWIDTH + tbloffset)))); scatter(out, 32, 128, timesminusplus(reverse(v1373), load(tbl, 250 * VECWIDTH + tbloffset), times(v1373, load(tbl, 251 * VECWIDTH + tbloffset)))); real2 v1313 = minusplus(v1311, v1312); real2 v1315 = minusplus(uminus(v1311), v1312); real2 v1323 = timesminusplus(reverse(v1313), load(tbl, 238 * VECWIDTH + tbloffset), times(v1313, load(tbl, 239 * VECWIDTH + tbloffset))); real2 v1417 = plus(v1283, v1323); real2 v1411 = reverse(minus(v1323, v1283)); scatter(out, 8, 128, plus(v1416, v1417)); real2 v1430 = minus(v1416, v1417); scatter(out, 72, 128, timesminusplus(v1430, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1430), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1413 = minusplus(v1411, v1412); real2 v1415 = minusplus(uminus(v1411), v1412); scatter(out, 104, 128, timesminusplus(reverse(v1415), load(tbl, 256 * VECWIDTH + tbloffset), times(v1415, load(tbl, 257 * VECWIDTH + tbloffset)))); scatter(out, 40, 128, timesminusplus(reverse(v1413), load(tbl, 254 * VECWIDTH + tbloffset), times(v1413, load(tbl, 255 * VECWIDTH + tbloffset)))); real2 v1329 = timesminusplus(reverse(v1315), load(tbl, 240 * VECWIDTH + tbloffset), times(v1315, load(tbl, 241 * VECWIDTH + tbloffset))); real2 v1443 = plus(v1289, v1329); real2 v1437 = reverse(minus(v1329, v1289)); scatter(out, 24, 128, plus(v1442, v1443)); real2 v1456 = minus(v1442, v1443); scatter(out, 88, 128, timesminusplus(v1456, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1456), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1441 = minusplus(uminus(v1437), v1438); real2 v1439 = minusplus(v1437, v1438); scatter(out, 120, 128, timesminusplus(reverse(v1441), load(tbl, 260 * VECWIDTH + tbloffset), times(v1441, load(tbl, 261 * VECWIDTH + tbloffset)))); scatter(out, 56, 128, timesminusplus(reverse(v1439), load(tbl, 258 * VECWIDTH + tbloffset), times(v1439, load(tbl, 259 * VECWIDTH + tbloffset)))); real2 v1235 = minusplus(uminus(v1231), v1232); real2 v1233 = minusplus(v1231, v1232); real2 v1243 = timesminusplus(reverse(v1233), load(tbl, 222 * VECWIDTH + tbloffset), times(v1233, load(tbl, 223 * VECWIDTH + tbloffset))); real2 v1489 = plus(v1163, v1243); real2 v1483 = reverse(minus(v1243, v1163)); real2 v1509 = plus(v1488, v1489); real2 v1503 = reverse(minus(v1489, v1488)); scatter(out, 4, 128, plus(v1508, v1509)); real2 v1522 = minus(v1508, v1509); scatter(out, 68, 128, timesminusplus(v1522, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1522), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1507 = minusplus(uminus(v1503), v1504); real2 v1505 = minusplus(v1503, v1504); scatter(out, 36, 128, timesminusplus(reverse(v1505), load(tbl, 270 * VECWIDTH + tbloffset), times(v1505, load(tbl, 271 * VECWIDTH + tbloffset)))); scatter(out, 100, 128, timesminusplus(reverse(v1507), load(tbl, 272 * VECWIDTH + tbloffset), times(v1507, load(tbl, 273 * VECWIDTH + tbloffset)))); real2 v1485 = minusplus(v1483, v1484); real2 v1487 = minusplus(uminus(v1483), v1484); real2 v1501 = timesminusplus(reverse(v1487), load(tbl, 268 * VECWIDTH + tbloffset), times(v1487, load(tbl, 269 * VECWIDTH + tbloffset))); scatter(out, 52, 128, plus(v1481, v1501)); real2 v1534 = minus(v1481, v1501); scatter(out, 116, 128, timesminusplus(v1534, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1534), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1495 = timesminusplus(reverse(v1485), load(tbl, 266 * VECWIDTH + tbloffset), times(v1485, load(tbl, 267 * VECWIDTH + tbloffset))); scatter(out, 20, 128, plus(v1475, v1495)); real2 v1528 = minus(v1475, v1495); scatter(out, 84, 128, timesminusplus(v1528, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1528), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1249 = timesminusplus(reverse(v1235), load(tbl, 224 * VECWIDTH + tbloffset), times(v1235, load(tbl, 225 * VECWIDTH + tbloffset))); real2 v1169 = timesminusplus(reverse(v1155), load(tbl, 208 * VECWIDTH + tbloffset), times(v1155, load(tbl, 209 * VECWIDTH + tbloffset))); real2 v1567 = plus(v1169, v1249); real2 v1561 = reverse(minus(v1249, v1169)); real2 v1581 = reverse(minus(v1567, v1566)); real2 v1587 = plus(v1566, v1567); scatter(out, 12, 128, plus(v1586, v1587)); real2 v1600 = minus(v1586, v1587); scatter(out, 76, 128, timesminusplus(v1600, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1600), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1583 = minusplus(v1581, v1582); scatter(out, 44, 128, timesminusplus(reverse(v1583), load(tbl, 282 * VECWIDTH + tbloffset), times(v1583, load(tbl, 283 * VECWIDTH + tbloffset)))); real2 v1585 = minusplus(uminus(v1581), v1582); scatter(out, 108, 128, timesminusplus(reverse(v1585), load(tbl, 284 * VECWIDTH + tbloffset), times(v1585, load(tbl, 285 * VECWIDTH + tbloffset)))); real2 v1565 = minusplus(uminus(v1561), v1562); real2 v1563 = minusplus(v1561, v1562); real2 v1579 = timesminusplus(reverse(v1565), load(tbl, 280 * VECWIDTH + tbloffset), times(v1565, load(tbl, 281 * VECWIDTH + tbloffset))); scatter(out, 60, 128, plus(v1559, v1579)); real2 v1612 = minus(v1559, v1579); scatter(out, 124, 128, timesminusplus(v1612, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1612), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1573 = timesminusplus(reverse(v1563), load(tbl, 278 * VECWIDTH + tbloffset), times(v1563, load(tbl, 279 * VECWIDTH + tbloffset))); scatter(out, 28, 128, plus(v1553, v1573)); real2 v1606 = minus(v1553, v1573); scatter(out, 92, 128, timesminusplus(v1606, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1606), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v833 = minusplus(v831, v832); real2 v835 = minusplus(uminus(v831), v832); real2 v955 = minusplus(uminus(v951), v952); real2 v953 = minusplus(v951, v952); real2 v963 = timesminusplus(reverse(v953), load(tbl, 166 * VECWIDTH + tbloffset), times(v953, load(tbl, 167 * VECWIDTH + tbloffset))); real2 v995 = minusplus(uminus(v991), v992); real2 v993 = minusplus(v991, v992); real2 v1003 = timesminusplus(reverse(v993), load(tbl, 174 * VECWIDTH + tbloffset), times(v993, load(tbl, 175 * VECWIDTH + tbloffset))); real2 v843 = timesminusplus(reverse(v833), load(tbl, 142 * VECWIDTH + tbloffset), times(v833, load(tbl, 143 * VECWIDTH + tbloffset))); real2 v1640 = minus(v963, v803); real2 v1644 = plus(v803, v963); real2 v1680 = minus(v1003, v843); real2 v1684 = plus(v843, v1003); real2 v1641 = minusplus(v1639, v1640); real2 v1643 = minusplus(uminus(v1639), v1640); real2 v1657 = timesminusplus(reverse(v1643), load(tbl, 292 * VECWIDTH + tbloffset), times(v1643, load(tbl, 293 * VECWIDTH + tbloffset))); real2 v913 = minusplus(v911, v912); real2 v915 = minusplus(uminus(v911), v912); real2 v1073 = minusplus(v1071, v1072); real2 v1075 = minusplus(uminus(v1071), v1072); real2 v923 = timesminusplus(reverse(v913), load(tbl, 158 * VECWIDTH + tbloffset), times(v913, load(tbl, 159 * VECWIDTH + tbloffset))); real2 v1083 = timesminusplus(reverse(v1073), load(tbl, 190 * VECWIDTH + tbloffset), times(v1073, load(tbl, 191 * VECWIDTH + tbloffset))); real2 v1685 = plus(v923, v1083); real2 v1679 = reverse(minus(v1083, v923)); real2 v1681 = minusplus(v1679, v1680); real2 v1683 = minusplus(uminus(v1679), v1680); real2 v1697 = timesminusplus(reverse(v1683), load(tbl, 300 * VECWIDTH + tbloffset), times(v1683, load(tbl, 301 * VECWIDTH + tbloffset))); real2 v1809 = plus(v1657, v1697); real2 v1803 = reverse(minus(v1697, v1657)); scatter(out, 26, 128, plus(v1808, v1809)); real2 v1822 = minus(v1808, v1809); scatter(out, 90, 128, timesminusplus(v1822, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1822), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1807 = minusplus(uminus(v1803), v1804); real2 v1805 = minusplus(v1803, v1804); scatter(out, 58, 128, timesminusplus(reverse(v1805), load(tbl, 318 * VECWIDTH + tbloffset), times(v1805, load(tbl, 319 * VECWIDTH + tbloffset)))); scatter(out, 122, 128, timesminusplus(reverse(v1807), load(tbl, 320 * VECWIDTH + tbloffset), times(v1807, load(tbl, 321 * VECWIDTH + tbloffset)))); real2 v1651 = timesminusplus(reverse(v1641), load(tbl, 290 * VECWIDTH + tbloffset), times(v1641, load(tbl, 291 * VECWIDTH + tbloffset))); real2 v1691 = timesminusplus(reverse(v1681), load(tbl, 298 * VECWIDTH + tbloffset), times(v1681, load(tbl, 299 * VECWIDTH + tbloffset))); real2 v1783 = plus(v1651, v1691); real2 v1777 = reverse(minus(v1691, v1651)); real2 v1779 = minusplus(v1777, v1778); real2 v1781 = minusplus(uminus(v1777), v1778); scatter(out, 106, 128, timesminusplus(reverse(v1781), load(tbl, 316 * VECWIDTH + tbloffset), times(v1781, load(tbl, 317 * VECWIDTH + tbloffset)))); scatter(out, 42, 128, timesminusplus(reverse(v1779), load(tbl, 314 * VECWIDTH + tbloffset), times(v1779, load(tbl, 315 * VECWIDTH + tbloffset)))); scatter(out, 10, 128, plus(v1782, v1783)); real2 v1796 = minus(v1782, v1783); scatter(out, 74, 128, timesminusplus(v1796, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1796), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1720 = minus(v1645, v1644); real2 v1724 = plus(v1644, v1645); real2 v1719 = reverse(minus(v1685, v1684)); real2 v1725 = plus(v1684, v1685); real2 v1745 = plus(v1724, v1725); real2 v1739 = reverse(minus(v1725, v1724)); scatter(out, 2, 128, plus(v1744, v1745)); real2 v1758 = minus(v1744, v1745); scatter(out, 66, 128, timesminusplus(v1758, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1758), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1741 = minusplus(v1739, v1740); real2 v1743 = minusplus(uminus(v1739), v1740); scatter(out, 98, 128, timesminusplus(reverse(v1743), load(tbl, 312 * VECWIDTH + tbloffset), times(v1743, load(tbl, 313 * VECWIDTH + tbloffset)))); scatter(out, 34, 128, timesminusplus(reverse(v1741), load(tbl, 310 * VECWIDTH + tbloffset), times(v1741, load(tbl, 311 * VECWIDTH + tbloffset)))); real2 v1723 = minusplus(uminus(v1719), v1720); real2 v1721 = minusplus(v1719, v1720); real2 v1737 = timesminusplus(reverse(v1723), load(tbl, 308 * VECWIDTH + tbloffset), times(v1723, load(tbl, 309 * VECWIDTH + tbloffset))); scatter(out, 50, 128, plus(v1717, v1737)); real2 v1770 = minus(v1717, v1737); scatter(out, 114, 128, timesminusplus(v1770, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1770), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1731 = timesminusplus(reverse(v1721), load(tbl, 306 * VECWIDTH + tbloffset), times(v1721, load(tbl, 307 * VECWIDTH + tbloffset))); scatter(out, 18, 128, plus(v1711, v1731)); real2 v1764 = minus(v1711, v1731); scatter(out, 82, 128, timesminusplus(v1764, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1764), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v809 = timesminusplus(reverse(v795), load(tbl, 136 * VECWIDTH + tbloffset), times(v795, load(tbl, 137 * VECWIDTH + tbloffset))); real2 v969 = timesminusplus(reverse(v955), load(tbl, 168 * VECWIDTH + tbloffset), times(v955, load(tbl, 169 * VECWIDTH + tbloffset))); real2 v1850 = minus(v969, v809); real2 v1854 = plus(v809, v969); real2 v849 = timesminusplus(reverse(v835), load(tbl, 144 * VECWIDTH + tbloffset), times(v835, load(tbl, 145 * VECWIDTH + tbloffset))); real2 v929 = timesminusplus(reverse(v915), load(tbl, 160 * VECWIDTH + tbloffset), times(v915, load(tbl, 161 * VECWIDTH + tbloffset))); real2 v889 = timesminusplus(reverse(v875), load(tbl, 152 * VECWIDTH + tbloffset), times(v875, load(tbl, 153 * VECWIDTH + tbloffset))); real2 v1089 = timesminusplus(reverse(v1075), load(tbl, 192 * VECWIDTH + tbloffset), times(v1075, load(tbl, 193 * VECWIDTH + tbloffset))); real2 v1009 = timesminusplus(reverse(v995), load(tbl, 176 * VECWIDTH + tbloffset), times(v995, load(tbl, 177 * VECWIDTH + tbloffset))); real2 v1890 = minus(v1009, v849); real2 v1894 = plus(v849, v1009); real2 v1849 = reverse(minus(v1049, v889)); real2 v1855 = plus(v889, v1049); real2 v1930 = minus(v1855, v1854); real2 v1934 = plus(v1854, v1855); real2 v1895 = plus(v929, v1089); real2 v1889 = reverse(minus(v1089, v929)); real2 v1929 = reverse(minus(v1895, v1894)); real2 v1935 = plus(v1894, v1895); real2 v1955 = plus(v1934, v1935); real2 v1949 = reverse(minus(v1935, v1934)); scatter(out, 6, 128, plus(v1954, v1955)); real2 v1968 = minus(v1954, v1955); scatter(out, 70, 128, timesminusplus(v1968, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1968), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1951 = minusplus(v1949, v1950); scatter(out, 38, 128, timesminusplus(reverse(v1951), load(tbl, 346 * VECWIDTH + tbloffset), times(v1951, load(tbl, 347 * VECWIDTH + tbloffset)))); real2 v1953 = minusplus(uminus(v1949), v1950); scatter(out, 102, 128, timesminusplus(reverse(v1953), load(tbl, 348 * VECWIDTH + tbloffset), times(v1953, load(tbl, 349 * VECWIDTH + tbloffset)))); real2 v1931 = minusplus(v1929, v1930); real2 v1933 = minusplus(uminus(v1929), v1930); real2 v1947 = timesminusplus(reverse(v1933), load(tbl, 344 * VECWIDTH + tbloffset), times(v1933, load(tbl, 345 * VECWIDTH + tbloffset))); scatter(out, 54, 128, plus(v1927, v1947)); real2 v1980 = minus(v1927, v1947); scatter(out, 118, 128, timesminusplus(v1980, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1980), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1941 = timesminusplus(reverse(v1931), load(tbl, 342 * VECWIDTH + tbloffset), times(v1931, load(tbl, 343 * VECWIDTH + tbloffset))); scatter(out, 22, 128, plus(v1921, v1941)); real2 v1974 = minus(v1921, v1941); scatter(out, 86, 128, timesminusplus(v1974, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v1974), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1851 = minusplus(v1849, v1850); real2 v1853 = minusplus(uminus(v1849), v1850); real2 v1867 = timesminusplus(reverse(v1853), load(tbl, 328 * VECWIDTH + tbloffset), times(v1853, load(tbl, 329 * VECWIDTH + tbloffset))); real2 v1891 = minusplus(v1889, v1890); real2 v1893 = minusplus(uminus(v1889), v1890); real2 v1907 = timesminusplus(reverse(v1893), load(tbl, 336 * VECWIDTH + tbloffset), times(v1893, load(tbl, 337 * VECWIDTH + tbloffset))); real2 v2019 = plus(v1867, v1907); real2 v2013 = reverse(minus(v1907, v1867)); scatter(out, 30, 128, plus(v2018, v2019)); real2 v2032 = minus(v2018, v2019); scatter(out, 94, 128, timesminusplus(v2032, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2032), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2017 = minusplus(uminus(v2013), v2014); scatter(out, 126, 128, timesminusplus(reverse(v2017), load(tbl, 356 * VECWIDTH + tbloffset), times(v2017, load(tbl, 357 * VECWIDTH + tbloffset)))); real2 v2015 = minusplus(v2013, v2014); scatter(out, 62, 128, timesminusplus(reverse(v2015), load(tbl, 354 * VECWIDTH + tbloffset), times(v2015, load(tbl, 355 * VECWIDTH + tbloffset)))); real2 v1861 = timesminusplus(reverse(v1851), load(tbl, 326 * VECWIDTH + tbloffset), times(v1851, load(tbl, 327 * VECWIDTH + tbloffset))); real2 v1901 = timesminusplus(reverse(v1891), load(tbl, 334 * VECWIDTH + tbloffset), times(v1891, load(tbl, 335 * VECWIDTH + tbloffset))); real2 v1993 = plus(v1861, v1901); real2 v1987 = reverse(minus(v1901, v1861)); scatter(out, 14, 128, plus(v1992, v1993)); real2 v2006 = minus(v1992, v1993); scatter(out, 78, 128, timesminusplus(v2006, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2006), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v1991 = minusplus(uminus(v1987), v1988); scatter(out, 110, 128, timesminusplus(reverse(v1991), load(tbl, 352 * VECWIDTH + tbloffset), times(v1991, load(tbl, 353 * VECWIDTH + tbloffset)))); real2 v1989 = minusplus(v1987, v1988); scatter(out, 46, 128, timesminusplus(reverse(v1989), load(tbl, 350 * VECWIDTH + tbloffset), times(v1989, load(tbl, 351 * VECWIDTH + tbloffset)))); real2 v593 = minusplus(v591, v592); real2 v595 = minusplus(uminus(v591), v592); real2 v473 = minusplus(v471, v472); real2 v475 = minusplus(uminus(v471), v472); real2 v555 = minusplus(uminus(v551), v552); real2 v553 = minusplus(v551, v552); real2 v609 = timesminusplus(reverse(v595), load(tbl, 96 * VECWIDTH + tbloffset), times(v595, load(tbl, 97 * VECWIDTH + tbloffset))); real2 v195 = minusplus(uminus(v191), v192); real2 v193 = minusplus(v191, v192); real2 v275 = minusplus(uminus(v271), v272); real2 v273 = minusplus(v271, v272); real2 v673 = minusplus(v671, v672); real2 v675 = minusplus(uminus(v671), v672); real2 v689 = timesminusplus(reverse(v675), load(tbl, 112 * VECWIDTH + tbloffset), times(v675, load(tbl, 113 * VECWIDTH + tbloffset))); real2 v209 = timesminusplus(reverse(v195), load(tbl, 16 * VECWIDTH + tbloffset), times(v195, load(tbl, 17 * VECWIDTH + tbloffset))); real2 v289 = timesminusplus(reverse(v275), load(tbl, 32 * VECWIDTH + tbloffset), times(v275, load(tbl, 33 * VECWIDTH + tbloffset))); real2 v755 = minusplus(uminus(v751), v752); real2 v753 = minusplus(v751, v752); real2 v435 = minusplus(uminus(v431), v432); real2 v433 = minusplus(v431, v432); real2 v513 = minusplus(v511, v512); real2 v515 = minusplus(uminus(v511), v512); real2 v529 = timesminusplus(reverse(v515), load(tbl, 80 * VECWIDTH + tbloffset), times(v515, load(tbl, 81 * VECWIDTH + tbloffset))); real2 v353 = minusplus(v351, v352); real2 v355 = minusplus(uminus(v351), v352); real2 v369 = timesminusplus(reverse(v355), load(tbl, 48 * VECWIDTH + tbloffset), times(v355, load(tbl, 49 * VECWIDTH + tbloffset))); real2 v2631 = plus(v369, v689); real2 v2625 = reverse(minus(v689, v369)); real2 v449 = timesminusplus(reverse(v435), load(tbl, 64 * VECWIDTH + tbloffset), times(v435, load(tbl, 65 * VECWIDTH + tbloffset))); real2 v2710 = plus(v289, v609); real2 v2706 = minus(v609, v289); real2 v2630 = plus(v209, v529); real2 v2626 = minus(v529, v209); real2 v2790 = plus(v2630, v2631); real2 v2786 = minus(v2631, v2630); real2 v713 = minusplus(v711, v712); real2 v715 = minusplus(uminus(v711), v712); real2 v769 = timesminusplus(reverse(v755), load(tbl, 128 * VECWIDTH + tbloffset), times(v755, load(tbl, 129 * VECWIDTH + tbloffset))); real2 v2705 = reverse(minus(v769, v449)); real2 v2711 = plus(v449, v769); real2 v313 = minusplus(v311, v312); real2 v315 = minusplus(uminus(v311), v312); real2 v393 = minusplus(v391, v392); real2 v395 = minusplus(uminus(v391), v392); real2 v409 = timesminusplus(reverse(v395), load(tbl, 56 * VECWIDTH + tbloffset), times(v395, load(tbl, 57 * VECWIDTH + tbloffset))); real2 v729 = timesminusplus(reverse(v715), load(tbl, 120 * VECWIDTH + tbloffset), times(v715, load(tbl, 121 * VECWIDTH + tbloffset))); real2 v329 = timesminusplus(reverse(v315), load(tbl, 40 * VECWIDTH + tbloffset), times(v315, load(tbl, 41 * VECWIDTH + tbloffset))); real2 v489 = timesminusplus(reverse(v475), load(tbl, 72 * VECWIDTH + tbloffset), times(v475, load(tbl, 73 * VECWIDTH + tbloffset))); real2 v153 = minusplus(v151, v152); real2 v155 = minusplus(uminus(v151), v152); real2 v169 = timesminusplus(reverse(v155), load(tbl, 8 * VECWIDTH + tbloffset), times(v155, load(tbl, 9 * VECWIDTH + tbloffset))); real2 v2586 = minus(v489, v169); real2 v2590 = plus(v169, v489); real2 v233 = minusplus(v231, v232); real2 v235 = minusplus(uminus(v231), v232); real2 v633 = minusplus(v631, v632); real2 v635 = minusplus(uminus(v631), v632); real2 v649 = timesminusplus(reverse(v635), load(tbl, 104 * VECWIDTH + tbloffset), times(v635, load(tbl, 105 * VECWIDTH + tbloffset))); real2 v249 = timesminusplus(reverse(v235), load(tbl, 24 * VECWIDTH + tbloffset), times(v235, load(tbl, 25 * VECWIDTH + tbloffset))); real2 v569 = timesminusplus(reverse(v555), load(tbl, 88 * VECWIDTH + tbloffset), times(v555, load(tbl, 89 * VECWIDTH + tbloffset))); real2 v2670 = plus(v249, v569); real2 v2666 = minus(v569, v249); real2 v2785 = reverse(minus(v2711, v2710)); real2 v2791 = plus(v2710, v2711); real2 v2825 = reverse(minus(v2791, v2790)); real2 v2831 = plus(v2790, v2791); real2 v2671 = plus(v409, v729); real2 v2665 = reverse(minus(v729, v409)); real2 v2745 = reverse(minus(v2671, v2670)); real2 v2751 = plus(v2670, v2671); real2 v2806 = minus(v2731, v2730); real2 v2810 = plus(v2730, v2731); real2 v2846 = minus(v2811, v2810); real2 v2850 = plus(v2810, v2811); real2 v2591 = plus(v329, v649); real2 v2585 = reverse(minus(v649, v329)); real2 v2750 = plus(v2590, v2591); real2 v2746 = minus(v2591, v2590); real2 v2830 = plus(v2750, v2751); real2 v2826 = minus(v2751, v2750); real2 v2845 = reverse(minus(v2831, v2830)); real2 v2851 = plus(v2830, v2831); scatter(out, 3, 128, plus(v2850, v2851)); real2 v2864 = minus(v2850, v2851); scatter(out, 67, 128, timesminusplus(v2864, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2864), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2849 = minusplus(uminus(v2845), v2846); real2 v2847 = minusplus(v2845, v2846); scatter(out, 35, 128, timesminusplus(reverse(v2847), load(tbl, 506 * VECWIDTH + tbloffset), times(v2847, load(tbl, 507 * VECWIDTH + tbloffset)))); scatter(out, 99, 128, timesminusplus(reverse(v2849), load(tbl, 508 * VECWIDTH + tbloffset), times(v2849, load(tbl, 509 * VECWIDTH + tbloffset)))); real2 v2827 = minusplus(v2825, v2826); real2 v2829 = minusplus(uminus(v2825), v2826); real2 v2837 = timesminusplus(reverse(v2827), load(tbl, 502 * VECWIDTH + tbloffset), times(v2827, load(tbl, 503 * VECWIDTH + tbloffset))); real2 v2809 = minusplus(uminus(v2805), v2806); real2 v2807 = minusplus(v2805, v2806); real2 v2817 = timesminusplus(reverse(v2807), load(tbl, 498 * VECWIDTH + tbloffset), times(v2807, load(tbl, 499 * VECWIDTH + tbloffset))); scatter(out, 19, 128, plus(v2817, v2837)); real2 v2870 = minus(v2817, v2837); scatter(out, 83, 128, timesminusplus(v2870, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2870), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2823 = timesminusplus(reverse(v2809), load(tbl, 500 * VECWIDTH + tbloffset), times(v2809, load(tbl, 501 * VECWIDTH + tbloffset))); real2 v2843 = timesminusplus(reverse(v2829), load(tbl, 504 * VECWIDTH + tbloffset), times(v2829, load(tbl, 505 * VECWIDTH + tbloffset))); scatter(out, 51, 128, plus(v2823, v2843)); real2 v2876 = minus(v2823, v2843); scatter(out, 115, 128, timesminusplus(v2876, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2876), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2787 = minusplus(v2785, v2786); real2 v2789 = minusplus(uminus(v2785), v2786); real2 v2803 = timesminusplus(reverse(v2789), load(tbl, 496 * VECWIDTH + tbloffset), times(v2789, load(tbl, 497 * VECWIDTH + tbloffset))); real2 v2727 = minusplus(v2725, v2726); real2 v2729 = minusplus(uminus(v2725), v2726); real2 v2743 = timesminusplus(reverse(v2729), load(tbl, 484 * VECWIDTH + tbloffset), times(v2729, load(tbl, 485 * VECWIDTH + tbloffset))); real2 v2914 = plus(v2743, v2783); real2 v2910 = minus(v2783, v2743); real2 v2749 = minusplus(uminus(v2745), v2746); real2 v2747 = minusplus(v2745, v2746); real2 v2763 = timesminusplus(reverse(v2749), load(tbl, 488 * VECWIDTH + tbloffset), times(v2749, load(tbl, 489 * VECWIDTH + tbloffset))); real2 v2909 = reverse(minus(v2803, v2763)); real2 v2915 = plus(v2763, v2803); scatter(out, 27, 128, plus(v2914, v2915)); real2 v2928 = minus(v2914, v2915); scatter(out, 91, 128, timesminusplus(v2928, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2928), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2913 = minusplus(uminus(v2909), v2910); scatter(out, 123, 128, timesminusplus(reverse(v2913), load(tbl, 516 * VECWIDTH + tbloffset), times(v2913, load(tbl, 517 * VECWIDTH + tbloffset)))); real2 v2911 = minusplus(v2909, v2910); scatter(out, 59, 128, timesminusplus(reverse(v2911), load(tbl, 514 * VECWIDTH + tbloffset), times(v2911, load(tbl, 515 * VECWIDTH + tbloffset)))); real2 v2737 = timesminusplus(reverse(v2727), load(tbl, 482 * VECWIDTH + tbloffset), times(v2727, load(tbl, 483 * VECWIDTH + tbloffset))); real2 v2888 = plus(v2737, v2777); real2 v2884 = minus(v2777, v2737); real2 v2797 = timesminusplus(reverse(v2787), load(tbl, 494 * VECWIDTH + tbloffset), times(v2787, load(tbl, 495 * VECWIDTH + tbloffset))); real2 v2757 = timesminusplus(reverse(v2747), load(tbl, 486 * VECWIDTH + tbloffset), times(v2747, load(tbl, 487 * VECWIDTH + tbloffset))); real2 v2889 = plus(v2757, v2797); real2 v2883 = reverse(minus(v2797, v2757)); scatter(out, 11, 128, plus(v2888, v2889)); real2 v2902 = minus(v2888, v2889); scatter(out, 75, 128, timesminusplus(v2902, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2902), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2887 = minusplus(uminus(v2883), v2884); scatter(out, 107, 128, timesminusplus(reverse(v2887), load(tbl, 512 * VECWIDTH + tbloffset), times(v2887, load(tbl, 513 * VECWIDTH + tbloffset)))); real2 v2885 = minusplus(v2883, v2884); scatter(out, 43, 128, timesminusplus(reverse(v2885), load(tbl, 510 * VECWIDTH + tbloffset), times(v2885, load(tbl, 511 * VECWIDTH + tbloffset)))); real2 v2669 = minusplus(uminus(v2665), v2666); real2 v2667 = minusplus(v2665, v2666); real2 v2707 = minusplus(v2705, v2706); real2 v2709 = minusplus(uminus(v2705), v2706); real2 v2717 = timesminusplus(reverse(v2707), load(tbl, 478 * VECWIDTH + tbloffset), times(v2707, load(tbl, 479 * VECWIDTH + tbloffset))); real2 v2627 = minusplus(v2625, v2626); real2 v2629 = minusplus(uminus(v2625), v2626); real2 v2637 = timesminusplus(reverse(v2627), load(tbl, 462 * VECWIDTH + tbloffset), times(v2627, load(tbl, 463 * VECWIDTH + tbloffset))); real2 v2961 = plus(v2637, v2717); real2 v2955 = reverse(minus(v2717, v2637)); real2 v2649 = minusplus(uminus(v2645), v2646); real2 v2647 = minusplus(v2645, v2646); real2 v2569 = minusplus(uminus(v2565), v2566); real2 v2567 = minusplus(v2565, v2566); real2 v2577 = timesminusplus(reverse(v2567), load(tbl, 450 * VECWIDTH + tbloffset), times(v2567, load(tbl, 451 * VECWIDTH + tbloffset))); real2 v2657 = timesminusplus(reverse(v2647), load(tbl, 466 * VECWIDTH + tbloffset), times(v2647, load(tbl, 467 * VECWIDTH + tbloffset))); real2 v2936 = minus(v2657, v2577); real2 v2940 = plus(v2577, v2657); real2 v2976 = minus(v2941, v2940); real2 v2980 = plus(v2940, v2941); real2 v2677 = timesminusplus(reverse(v2667), load(tbl, 470 * VECWIDTH + tbloffset), times(v2667, load(tbl, 471 * VECWIDTH + tbloffset))); real2 v2587 = minusplus(v2585, v2586); real2 v2589 = minusplus(uminus(v2585), v2586); real2 v2597 = timesminusplus(reverse(v2587), load(tbl, 454 * VECWIDTH + tbloffset), times(v2587, load(tbl, 455 * VECWIDTH + tbloffset))); real2 v2956 = minus(v2677, v2597); real2 v2960 = plus(v2597, v2677); real2 v2975 = reverse(minus(v2961, v2960)); real2 v2981 = plus(v2960, v2961); scatter(out, 7, 128, plus(v2980, v2981)); real2 v2994 = minus(v2980, v2981); scatter(out, 71, 128, timesminusplus(v2994, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2994), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2979 = minusplus(uminus(v2975), v2976); scatter(out, 103, 128, timesminusplus(reverse(v2979), load(tbl, 528 * VECWIDTH + tbloffset), times(v2979, load(tbl, 529 * VECWIDTH + tbloffset)))); real2 v2977 = minusplus(v2975, v2976); scatter(out, 39, 128, timesminusplus(reverse(v2977), load(tbl, 526 * VECWIDTH + tbloffset), times(v2977, load(tbl, 527 * VECWIDTH + tbloffset)))); real2 v2939 = minusplus(uminus(v2935), v2936); real2 v2937 = minusplus(v2935, v2936); real2 v2953 = timesminusplus(reverse(v2939), load(tbl, 520 * VECWIDTH + tbloffset), times(v2939, load(tbl, 521 * VECWIDTH + tbloffset))); real2 v2957 = minusplus(v2955, v2956); real2 v2959 = minusplus(uminus(v2955), v2956); real2 v2973 = timesminusplus(reverse(v2959), load(tbl, 524 * VECWIDTH + tbloffset), times(v2959, load(tbl, 525 * VECWIDTH + tbloffset))); scatter(out, 55, 128, plus(v2953, v2973)); real2 v3006 = minus(v2953, v2973); scatter(out, 119, 128, timesminusplus(v3006, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3006), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2947 = timesminusplus(reverse(v2937), load(tbl, 518 * VECWIDTH + tbloffset), times(v2937, load(tbl, 519 * VECWIDTH + tbloffset))); real2 v2967 = timesminusplus(reverse(v2957), load(tbl, 522 * VECWIDTH + tbloffset), times(v2957, load(tbl, 523 * VECWIDTH + tbloffset))); scatter(out, 23, 128, plus(v2947, v2967)); real2 v3000 = minus(v2947, v2967); scatter(out, 87, 128, timesminusplus(v3000, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3000), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2663 = timesminusplus(reverse(v2649), load(tbl, 468 * VECWIDTH + tbloffset), times(v2649, load(tbl, 469 * VECWIDTH + tbloffset))); real2 v2583 = timesminusplus(reverse(v2569), load(tbl, 452 * VECWIDTH + tbloffset), times(v2569, load(tbl, 453 * VECWIDTH + tbloffset))); real2 v3014 = minus(v2663, v2583); real2 v3018 = plus(v2583, v2663); real2 v3015 = minusplus(v3013, v3014); real2 v3017 = minusplus(uminus(v3013), v3014); real2 v2643 = timesminusplus(reverse(v2629), load(tbl, 464 * VECWIDTH + tbloffset), times(v2629, load(tbl, 465 * VECWIDTH + tbloffset))); real2 v2723 = timesminusplus(reverse(v2709), load(tbl, 480 * VECWIDTH + tbloffset), times(v2709, load(tbl, 481 * VECWIDTH + tbloffset))); real2 v3039 = plus(v2643, v2723); real2 v3033 = reverse(minus(v2723, v2643)); real2 v2683 = timesminusplus(reverse(v2669), load(tbl, 472 * VECWIDTH + tbloffset), times(v2669, load(tbl, 473 * VECWIDTH + tbloffset))); real2 v3031 = timesminusplus(reverse(v3017), load(tbl, 532 * VECWIDTH + tbloffset), times(v3017, load(tbl, 533 * VECWIDTH + tbloffset))); real2 v2603 = timesminusplus(reverse(v2589), load(tbl, 456 * VECWIDTH + tbloffset), times(v2589, load(tbl, 457 * VECWIDTH + tbloffset))); real2 v3034 = minus(v2683, v2603); real2 v3038 = plus(v2603, v2683); real2 v3037 = minusplus(uminus(v3033), v3034); real2 v3035 = minusplus(v3033, v3034); real2 v3051 = timesminusplus(reverse(v3037), load(tbl, 536 * VECWIDTH + tbloffset), times(v3037, load(tbl, 537 * VECWIDTH + tbloffset))); scatter(out, 63, 128, plus(v3031, v3051)); real2 v3084 = minus(v3031, v3051); scatter(out, 127, 128, timesminusplus(v3084, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3084), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v3025 = timesminusplus(reverse(v3015), load(tbl, 530 * VECWIDTH + tbloffset), times(v3015, load(tbl, 531 * VECWIDTH + tbloffset))); real2 v3045 = timesminusplus(reverse(v3035), load(tbl, 534 * VECWIDTH + tbloffset), times(v3035, load(tbl, 535 * VECWIDTH + tbloffset))); scatter(out, 31, 128, plus(v3025, v3045)); real2 v3078 = minus(v3025, v3045); scatter(out, 95, 128, timesminusplus(v3078, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3078), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v3058 = plus(v3018, v3019); real2 v3054 = minus(v3019, v3018); real2 v3053 = reverse(minus(v3039, v3038)); real2 v3059 = plus(v3038, v3039); real2 v3055 = minusplus(v3053, v3054); scatter(out, 47, 128, timesminusplus(reverse(v3055), load(tbl, 538 * VECWIDTH + tbloffset), times(v3055, load(tbl, 539 * VECWIDTH + tbloffset)))); real2 v3057 = minusplus(uminus(v3053), v3054); scatter(out, 111, 128, timesminusplus(reverse(v3057), load(tbl, 540 * VECWIDTH + tbloffset), times(v3057, load(tbl, 541 * VECWIDTH + tbloffset)))); scatter(out, 15, 128, plus(v3058, v3059)); real2 v3072 = minus(v3058, v3059); scatter(out, 79, 128, timesminusplus(v3072, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v3072), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v683 = timesminusplus(reverse(v673), load(tbl, 110 * VECWIDTH + tbloffset), times(v673, load(tbl, 111 * VECWIDTH + tbloffset))); real2 v363 = timesminusplus(reverse(v353), load(tbl, 46 * VECWIDTH + tbloffset), times(v353, load(tbl, 47 * VECWIDTH + tbloffset))); real2 v2105 = plus(v363, v683); real2 v2099 = reverse(minus(v683, v363)); real2 v283 = timesminusplus(reverse(v273), load(tbl, 30 * VECWIDTH + tbloffset), times(v273, load(tbl, 31 * VECWIDTH + tbloffset))); real2 v723 = timesminusplus(reverse(v713), load(tbl, 118 * VECWIDTH + tbloffset), times(v713, load(tbl, 119 * VECWIDTH + tbloffset))); real2 v403 = timesminusplus(reverse(v393), load(tbl, 54 * VECWIDTH + tbloffset), times(v393, load(tbl, 55 * VECWIDTH + tbloffset))); real2 v603 = timesminusplus(reverse(v593), load(tbl, 94 * VECWIDTH + tbloffset), times(v593, load(tbl, 95 * VECWIDTH + tbloffset))); real2 v2180 = minus(v603, v283); real2 v2184 = plus(v283, v603); real2 v2145 = plus(v403, v723); real2 v2139 = reverse(minus(v723, v403)); real2 v543 = timesminusplus(reverse(v533), load(tbl, 82 * VECWIDTH + tbloffset), times(v533, load(tbl, 83 * VECWIDTH + tbloffset))); real2 v383 = timesminusplus(reverse(v373), load(tbl, 50 * VECWIDTH + tbloffset), times(v373, load(tbl, 51 * VECWIDTH + tbloffset))); real2 v703 = timesminusplus(reverse(v693), load(tbl, 114 * VECWIDTH + tbloffset), times(v693, load(tbl, 115 * VECWIDTH + tbloffset))); real2 v2125 = plus(v383, v703); real2 v2119 = reverse(minus(v703, v383)); real2 v223 = timesminusplus(reverse(v213), load(tbl, 18 * VECWIDTH + tbloffset), times(v213, load(tbl, 19 * VECWIDTH + tbloffset))); real2 v2120 = minus(v543, v223); real2 v2124 = plus(v223, v543); real2 v443 = timesminusplus(reverse(v433), load(tbl, 62 * VECWIDTH + tbloffset), times(v433, load(tbl, 63 * VECWIDTH + tbloffset))); real2 v203 = timesminusplus(reverse(v193), load(tbl, 14 * VECWIDTH + tbloffset), times(v193, load(tbl, 15 * VECWIDTH + tbloffset))); real2 v763 = timesminusplus(reverse(v753), load(tbl, 126 * VECWIDTH + tbloffset), times(v753, load(tbl, 127 * VECWIDTH + tbloffset))); real2 v2179 = reverse(minus(v763, v443)); real2 v2185 = plus(v443, v763); real2 v523 = timesminusplus(reverse(v513), load(tbl, 78 * VECWIDTH + tbloffset), times(v513, load(tbl, 79 * VECWIDTH + tbloffset))); real2 v2100 = minus(v523, v203); real2 v2104 = plus(v203, v523); real2 v2264 = plus(v2104, v2105); real2 v2260 = minus(v2105, v2104); real2 v643 = timesminusplus(reverse(v633), load(tbl, 102 * VECWIDTH + tbloffset), times(v633, load(tbl, 103 * VECWIDTH + tbloffset))); real2 v2265 = plus(v2184, v2185); real2 v2259 = reverse(minus(v2185, v2184)); real2 v563 = timesminusplus(reverse(v553), load(tbl, 86 * VECWIDTH + tbloffset), times(v553, load(tbl, 87 * VECWIDTH + tbloffset))); real2 v243 = timesminusplus(reverse(v233), load(tbl, 22 * VECWIDTH + tbloffset), times(v233, load(tbl, 23 * VECWIDTH + tbloffset))); real2 v2144 = plus(v243, v563); real2 v2140 = minus(v563, v243); real2 v143 = timesminusplus(reverse(v133), load(tbl, 2 * VECWIDTH + tbloffset), times(v133, load(tbl, 3 * VECWIDTH + tbloffset))); real2 v183 = timesminusplus(reverse(v173), load(tbl, 10 * VECWIDTH + tbloffset), times(v173, load(tbl, 11 * VECWIDTH + tbloffset))); real2 v2084 = plus(v183, v503); real2 v2080 = minus(v503, v183); real2 v163 = timesminusplus(reverse(v153), load(tbl, 6 * VECWIDTH + tbloffset), times(v153, load(tbl, 7 * VECWIDTH + tbloffset))); real2 v303 = timesminusplus(reverse(v293), load(tbl, 34 * VECWIDTH + tbloffset), times(v293, load(tbl, 35 * VECWIDTH + tbloffset))); real2 v623 = timesminusplus(reverse(v613), load(tbl, 98 * VECWIDTH + tbloffset), times(v613, load(tbl, 99 * VECWIDTH + tbloffset))); real2 v2039 = reverse(minus(v623, v303)); real2 v2045 = plus(v303, v623); real2 v463 = timesminusplus(reverse(v453), load(tbl, 66 * VECWIDTH + tbloffset), times(v453, load(tbl, 67 * VECWIDTH + tbloffset))); real2 v2044 = plus(v143, v463); real2 v2040 = minus(v463, v143); real2 v2204 = plus(v2044, v2045); real2 v2200 = minus(v2045, v2044); real2 v323 = timesminusplus(reverse(v313), load(tbl, 38 * VECWIDTH + tbloffset), times(v313, load(tbl, 39 * VECWIDTH + tbloffset))); real2 v2205 = plus(v2124, v2125); real2 v2199 = reverse(minus(v2125, v2124)); real2 v2280 = minus(v2205, v2204); real2 v2284 = plus(v2204, v2205); real2 v2225 = plus(v2144, v2145); real2 v2219 = reverse(minus(v2145, v2144)); real2 v2305 = plus(v2264, v2265); real2 v2299 = reverse(minus(v2265, v2264)); real2 v2240 = minus(v2085, v2084); real2 v2244 = plus(v2084, v2085); real2 v2279 = reverse(minus(v2245, v2244)); real2 v2285 = plus(v2244, v2245); real2 v2281 = minusplus(v2279, v2280); real2 v2283 = minusplus(uminus(v2279), v2280); real2 v2291 = timesminusplus(reverse(v2281), load(tbl, 406 * VECWIDTH + tbloffset), times(v2281, load(tbl, 407 * VECWIDTH + tbloffset))); real2 v483 = timesminusplus(reverse(v473), load(tbl, 70 * VECWIDTH + tbloffset), times(v473, load(tbl, 71 * VECWIDTH + tbloffset))); real2 v2060 = minus(v483, v163); real2 v2064 = plus(v163, v483); real2 v2065 = plus(v323, v643); real2 v2059 = reverse(minus(v643, v323)); real2 v2220 = minus(v2065, v2064); real2 v2224 = plus(v2064, v2065); real2 v2304 = plus(v2224, v2225); real2 v2300 = minus(v2225, v2224); real2 v2301 = minusplus(v2299, v2300); real2 v2303 = minusplus(uminus(v2299), v2300); real2 v2311 = timesminusplus(reverse(v2301), load(tbl, 410 * VECWIDTH + tbloffset), times(v2301, load(tbl, 411 * VECWIDTH + tbloffset))); scatter(out, 17, 128, plus(v2291, v2311)); real2 v2344 = minus(v2291, v2311); scatter(out, 81, 128, timesminusplus(v2344, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2344), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2297 = timesminusplus(reverse(v2283), load(tbl, 408 * VECWIDTH + tbloffset), times(v2283, load(tbl, 409 * VECWIDTH + tbloffset))); real2 v2317 = timesminusplus(reverse(v2303), load(tbl, 412 * VECWIDTH + tbloffset), times(v2303, load(tbl, 413 * VECWIDTH + tbloffset))); scatter(out, 49, 128, plus(v2297, v2317)); real2 v2350 = minus(v2297, v2317); scatter(out, 113, 128, timesminusplus(v2350, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2350), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2320 = minus(v2285, v2284); real2 v2324 = plus(v2284, v2285); real2 v2325 = plus(v2304, v2305); real2 v2319 = reverse(minus(v2305, v2304)); scatter(out, 1, 128, plus(v2324, v2325)); real2 v2338 = minus(v2324, v2325); scatter(out, 65, 128, timesminusplus(v2338, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2338), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2321 = minusplus(v2319, v2320); scatter(out, 33, 128, timesminusplus(reverse(v2321), load(tbl, 414 * VECWIDTH + tbloffset), times(v2321, load(tbl, 415 * VECWIDTH + tbloffset)))); real2 v2323 = minusplus(uminus(v2319), v2320); scatter(out, 97, 128, timesminusplus(reverse(v2323), load(tbl, 416 * VECWIDTH + tbloffset), times(v2323, load(tbl, 417 * VECWIDTH + tbloffset)))); real2 v2201 = minusplus(v2199, v2200); real2 v2203 = minusplus(uminus(v2199), v2200); real2 v2263 = minusplus(uminus(v2259), v2260); real2 v2261 = minusplus(v2259, v2260); real2 v2243 = minusplus(uminus(v2239), v2240); real2 v2241 = minusplus(v2239, v2240); real2 v2257 = timesminusplus(reverse(v2243), load(tbl, 400 * VECWIDTH + tbloffset), times(v2243, load(tbl, 401 * VECWIDTH + tbloffset))); real2 v2217 = timesminusplus(reverse(v2203), load(tbl, 392 * VECWIDTH + tbloffset), times(v2203, load(tbl, 393 * VECWIDTH + tbloffset))); real2 v2388 = plus(v2217, v2257); real2 v2384 = minus(v2257, v2217); real2 v2277 = timesminusplus(reverse(v2263), load(tbl, 404 * VECWIDTH + tbloffset), times(v2263, load(tbl, 405 * VECWIDTH + tbloffset))); real2 v2221 = minusplus(v2219, v2220); real2 v2223 = minusplus(uminus(v2219), v2220); real2 v2237 = timesminusplus(reverse(v2223), load(tbl, 396 * VECWIDTH + tbloffset), times(v2223, load(tbl, 397 * VECWIDTH + tbloffset))); real2 v2389 = plus(v2237, v2277); real2 v2383 = reverse(minus(v2277, v2237)); scatter(out, 25, 128, plus(v2388, v2389)); real2 v2402 = minus(v2388, v2389); scatter(out, 89, 128, timesminusplus(v2402, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2402), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2385 = minusplus(v2383, v2384); real2 v2387 = minusplus(uminus(v2383), v2384); scatter(out, 121, 128, timesminusplus(reverse(v2387), load(tbl, 424 * VECWIDTH + tbloffset), times(v2387, load(tbl, 425 * VECWIDTH + tbloffset)))); scatter(out, 57, 128, timesminusplus(reverse(v2385), load(tbl, 422 * VECWIDTH + tbloffset), times(v2385, load(tbl, 423 * VECWIDTH + tbloffset)))); real2 v2251 = timesminusplus(reverse(v2241), load(tbl, 398 * VECWIDTH + tbloffset), times(v2241, load(tbl, 399 * VECWIDTH + tbloffset))); real2 v2211 = timesminusplus(reverse(v2201), load(tbl, 390 * VECWIDTH + tbloffset), times(v2201, load(tbl, 391 * VECWIDTH + tbloffset))); real2 v2358 = minus(v2251, v2211); real2 v2362 = plus(v2211, v2251); real2 v2271 = timesminusplus(reverse(v2261), load(tbl, 402 * VECWIDTH + tbloffset), times(v2261, load(tbl, 403 * VECWIDTH + tbloffset))); real2 v2231 = timesminusplus(reverse(v2221), load(tbl, 394 * VECWIDTH + tbloffset), times(v2221, load(tbl, 395 * VECWIDTH + tbloffset))); real2 v2357 = reverse(minus(v2271, v2231)); real2 v2363 = plus(v2231, v2271); scatter(out, 9, 128, plus(v2362, v2363)); real2 v2376 = minus(v2362, v2363); scatter(out, 73, 128, timesminusplus(v2376, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2376), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2361 = minusplus(uminus(v2357), v2358); scatter(out, 105, 128, timesminusplus(reverse(v2361), load(tbl, 420 * VECWIDTH + tbloffset), times(v2361, load(tbl, 421 * VECWIDTH + tbloffset)))); real2 v2359 = minusplus(v2357, v2358); scatter(out, 41, 128, timesminusplus(reverse(v2359), load(tbl, 418 * VECWIDTH + tbloffset), times(v2359, load(tbl, 419 * VECWIDTH + tbloffset)))); real2 v2121 = minusplus(v2119, v2120); real2 v2123 = minusplus(uminus(v2119), v2120); real2 v2083 = minusplus(uminus(v2079), v2080); real2 v2081 = minusplus(v2079, v2080); real2 v2091 = timesminusplus(reverse(v2081), load(tbl, 366 * VECWIDTH + tbloffset), times(v2081, load(tbl, 367 * VECWIDTH + tbloffset))); real2 v2043 = minusplus(uminus(v2039), v2040); real2 v2041 = minusplus(v2039, v2040); real2 v2051 = timesminusplus(reverse(v2041), load(tbl, 358 * VECWIDTH + tbloffset), times(v2041, load(tbl, 359 * VECWIDTH + tbloffset))); real2 v2131 = timesminusplus(reverse(v2121), load(tbl, 374 * VECWIDTH + tbloffset), times(v2121, load(tbl, 375 * VECWIDTH + tbloffset))); real2 v2163 = minusplus(uminus(v2159), v2160); real2 v2161 = minusplus(v2159, v2160); real2 v2171 = timesminusplus(reverse(v2161), load(tbl, 382 * VECWIDTH + tbloffset), times(v2161, load(tbl, 383 * VECWIDTH + tbloffset))); real2 v2409 = reverse(minus(v2171, v2091)); real2 v2415 = plus(v2091, v2171); real2 v2410 = minus(v2131, v2051); real2 v2414 = plus(v2051, v2131); real2 v2454 = plus(v2414, v2415); real2 v2450 = minus(v2415, v2414); real2 v2181 = minusplus(v2179, v2180); real2 v2183 = minusplus(uminus(v2179), v2180); real2 v2191 = timesminusplus(reverse(v2181), load(tbl, 386 * VECWIDTH + tbloffset), times(v2181, load(tbl, 387 * VECWIDTH + tbloffset))); real2 v2103 = minusplus(uminus(v2099), v2100); real2 v2101 = minusplus(v2099, v2100); real2 v2111 = timesminusplus(reverse(v2101), load(tbl, 370 * VECWIDTH + tbloffset), times(v2101, load(tbl, 371 * VECWIDTH + tbloffset))); real2 v2435 = plus(v2111, v2191); real2 v2429 = reverse(minus(v2191, v2111)); real2 v2141 = minusplus(v2139, v2140); real2 v2143 = minusplus(uminus(v2139), v2140); real2 v2151 = timesminusplus(reverse(v2141), load(tbl, 378 * VECWIDTH + tbloffset), times(v2141, load(tbl, 379 * VECWIDTH + tbloffset))); real2 v2063 = minusplus(uminus(v2059), v2060); real2 v2061 = minusplus(v2059, v2060); real2 v2071 = timesminusplus(reverse(v2061), load(tbl, 362 * VECWIDTH + tbloffset), times(v2061, load(tbl, 363 * VECWIDTH + tbloffset))); real2 v2434 = plus(v2071, v2151); real2 v2430 = minus(v2151, v2071); real2 v2455 = plus(v2434, v2435); real2 v2449 = reverse(minus(v2435, v2434)); scatter(out, 5, 128, plus(v2454, v2455)); real2 v2468 = minus(v2454, v2455); scatter(out, 69, 128, timesminusplus(v2468, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2468), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2451 = minusplus(v2449, v2450); real2 v2453 = minusplus(uminus(v2449), v2450); scatter(out, 101, 128, timesminusplus(reverse(v2453), load(tbl, 436 * VECWIDTH + tbloffset), times(v2453, load(tbl, 437 * VECWIDTH + tbloffset)))); scatter(out, 37, 128, timesminusplus(reverse(v2451), load(tbl, 434 * VECWIDTH + tbloffset), times(v2451, load(tbl, 435 * VECWIDTH + tbloffset)))); real2 v2411 = minusplus(v2409, v2410); real2 v2413 = minusplus(uminus(v2409), v2410); real2 v2433 = minusplus(uminus(v2429), v2430); real2 v2431 = minusplus(v2429, v2430); real2 v2421 = timesminusplus(reverse(v2411), load(tbl, 426 * VECWIDTH + tbloffset), times(v2411, load(tbl, 427 * VECWIDTH + tbloffset))); real2 v2441 = timesminusplus(reverse(v2431), load(tbl, 430 * VECWIDTH + tbloffset), times(v2431, load(tbl, 431 * VECWIDTH + tbloffset))); scatter(out, 21, 128, plus(v2421, v2441)); real2 v2474 = minus(v2421, v2441); scatter(out, 85, 128, timesminusplus(v2474, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2474), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2427 = timesminusplus(reverse(v2413), load(tbl, 428 * VECWIDTH + tbloffset), times(v2413, load(tbl, 429 * VECWIDTH + tbloffset))); real2 v2447 = timesminusplus(reverse(v2433), load(tbl, 432 * VECWIDTH + tbloffset), times(v2433, load(tbl, 433 * VECWIDTH + tbloffset))); scatter(out, 53, 128, plus(v2427, v2447)); real2 v2480 = minus(v2427, v2447); scatter(out, 117, 128, timesminusplus(v2480, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2480), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2057 = timesminusplus(reverse(v2043), load(tbl, 360 * VECWIDTH + tbloffset), times(v2043, load(tbl, 361 * VECWIDTH + tbloffset))); real2 v2097 = timesminusplus(reverse(v2083), load(tbl, 368 * VECWIDTH + tbloffset), times(v2083, load(tbl, 369 * VECWIDTH + tbloffset))); real2 v2157 = timesminusplus(reverse(v2143), load(tbl, 380 * VECWIDTH + tbloffset), times(v2143, load(tbl, 381 * VECWIDTH + tbloffset))); real2 v2197 = timesminusplus(reverse(v2183), load(tbl, 388 * VECWIDTH + tbloffset), times(v2183, load(tbl, 389 * VECWIDTH + tbloffset))); real2 v2117 = timesminusplus(reverse(v2103), load(tbl, 372 * VECWIDTH + tbloffset), times(v2103, load(tbl, 373 * VECWIDTH + tbloffset))); real2 v2507 = reverse(minus(v2197, v2117)); real2 v2513 = plus(v2117, v2197); real2 v2137 = timesminusplus(reverse(v2123), load(tbl, 376 * VECWIDTH + tbloffset), times(v2123, load(tbl, 377 * VECWIDTH + tbloffset))); real2 v2488 = minus(v2137, v2057); real2 v2492 = plus(v2057, v2137); real2 v2177 = timesminusplus(reverse(v2163), load(tbl, 384 * VECWIDTH + tbloffset), times(v2163, load(tbl, 385 * VECWIDTH + tbloffset))); real2 v2493 = plus(v2097, v2177); real2 v2487 = reverse(minus(v2177, v2097)); real2 v2532 = plus(v2492, v2493); real2 v2528 = minus(v2493, v2492); real2 v2077 = timesminusplus(reverse(v2063), load(tbl, 364 * VECWIDTH + tbloffset), times(v2063, load(tbl, 365 * VECWIDTH + tbloffset))); real2 v2512 = plus(v2077, v2157); real2 v2508 = minus(v2157, v2077); real2 v2527 = reverse(minus(v2513, v2512)); real2 v2533 = plus(v2512, v2513); real2 v2529 = minusplus(v2527, v2528); real2 v2531 = minusplus(uminus(v2527), v2528); scatter(out, 109, 128, timesminusplus(reverse(v2531), load(tbl, 448 * VECWIDTH + tbloffset), times(v2531, load(tbl, 449 * VECWIDTH + tbloffset)))); scatter(out, 45, 128, timesminusplus(reverse(v2529), load(tbl, 446 * VECWIDTH + tbloffset), times(v2529, load(tbl, 447 * VECWIDTH + tbloffset)))); scatter(out, 13, 128, plus(v2532, v2533)); real2 v2546 = minus(v2532, v2533); scatter(out, 77, 128, timesminusplus(v2546, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2546), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2509 = minusplus(v2507, v2508); real2 v2511 = minusplus(uminus(v2507), v2508); real2 v2491 = minusplus(uminus(v2487), v2488); real2 v2489 = minusplus(v2487, v2488); real2 v2499 = timesminusplus(reverse(v2489), load(tbl, 438 * VECWIDTH + tbloffset), times(v2489, load(tbl, 439 * VECWIDTH + tbloffset))); real2 v2519 = timesminusplus(reverse(v2509), load(tbl, 442 * VECWIDTH + tbloffset), times(v2509, load(tbl, 443 * VECWIDTH + tbloffset))); scatter(out, 29, 128, plus(v2499, v2519)); real2 v2552 = minus(v2499, v2519); scatter(out, 93, 128, timesminusplus(v2552, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2552), load(tbl, 1 * VECWIDTH + tbloffset)))); real2 v2505 = timesminusplus(reverse(v2491), load(tbl, 440 * VECWIDTH + tbloffset), times(v2491, load(tbl, 441 * VECWIDTH + tbloffset))); real2 v2525 = timesminusplus(reverse(v2511), load(tbl, 444 * VECWIDTH + tbloffset), times(v2511, load(tbl, 445 * VECWIDTH + tbloffset))); scatter(out, 61, 128, plus(v2505, v2525)); real2 v2558 = minus(v2505, v2525); scatter(out, 125, 128, timesminusplus(v2558, load(tbl, 0 * VECWIDTH + tbloffset), times(reverse(v2558), load(tbl, 1 * VECWIDTH + tbloffset)))); // Pres : 76263 } } #endif // #undef EMITREALSUB #ifdef EMITREALSUB ALIGNED(8192) void realSub0_%ISA%(real *d, const real *s, const int log2len, const real *rtCoef0, const real *rtCoef1) { const int n = 1 << log2len; real s0 = s[0], s1 = s[1]; int k=1; d[n+0] = s[n+0]; d[n+1] = s[n+1]; for(;;k+=VECWIDTH) { int idx0 = k, idx1 = n-VECWIDTH+1-k; if (idx0 + VECWIDTH >= idx1) break; real2 v = loadu(s, idx0); real2 u = reverse2(load (s, idx1)); real2 t = minusplus(v, u); real2 m = minusplus(reverse(times(t, loadu(rtCoef1, k))), times(t, loadu(rtCoef0, k))); storeu(d, idx0, minusplus(v, uminus(m))); store (d, idx1, reverse2(minus(u, m))); } for(;k= idx1) break; real2 v = loadu(s, idx0); real2 u = reverse2(load (s, idx1)); real2 t = minusplus(v, u); real2 m = minusplus(reverse(times(t, loadu(rtCoef1, k))), times(t, loadu(rtCoef0, k))); storeu(d, idx0, ctimes(uplusminus(minus(u, m)), c)); store (d, idx1, ctimes(reverse2(minusplus(m, uminus(v))), c)); } for(;k #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include "sleef.h" #ifdef ENABLE_SSE2 #include "helpersse2.h" #endif #ifdef ENABLE_AVX #include "helperavx.h" #endif #ifdef ENABLE_AVX2 #include "helperavx2.h" #endif #ifdef ENABLE_AVX512F #include "helperavx512f.h" #endif #ifdef ENABLE_NEON32 #include "helperneon32.h" #endif #ifdef ENABLE_ADVSIMD #include "helperadvsimd.h" #endif #ifdef ENABLE_SVE #include "helpersve.h" #endif #ifdef ENABLE_VSX #include "helperpower_128.h" #endif #ifdef ENABLE_VECEXT #include "helpervecext.h" #endif #ifdef ENABLE_PUREC #include "helperpurec.h" #endif #define IMPORT_IS_EXPORT #include "sleefdft.h" #if BASETYPEID == 1 #define LOG2VECWIDTH (LOG2VECTLENDP-1) #define VECWIDTH (1 << LOG2VECWIDTH) typedef double real; typedef vdouble real2; static int available(int name) { return vavailability_i(name); } static INLINE real2 uminus(real2 d0) { return vneg_vd_vd(d0); } static INLINE real2 uplusminus(real2 d0) { return vposneg_vd_vd(d0); } static INLINE real2 uminusplus(real2 d0) { return vnegpos_vd_vd(d0); } static INLINE real2 plus(real2 d0, real2 d1) { return vadd_vd_vd_vd(d0, d1); } static INLINE real2 minus(real2 d0, real2 d1) { return vsub_vd_vd_vd(d0, d1); } static INLINE real2 minusplus(real2 d0, real2 d1) { return vsubadd_vd_vd_vd(d0, d1); } static INLINE real2 times(real2 d0, real2 d1) { return vmul_vd_vd_vd(d0, d1); } static INLINE real2 timesminusplus(real2 d0, real2 d2, real2 d1) { return vmlsubadd_vd_vd_vd_vd(d0, d2, d1); } static INLINE real2 ctimes(real2 d0, real d) { return vmul_vd_vd_vd(d0, vcast_vd_d(d)); } static INLINE real2 ctimesminusplus(real2 d0, real c, real2 d1) { return vmlsubadd_vd_vd_vd_vd(d0, vcast_vd_d(c), d1); } static INLINE real2 reverse(real2 d0) { return vrev21_vd_vd(d0); } static INLINE real2 reverse2(real2 d0) { return vreva2_vd_vd(d0); } static INLINE real2 loadc(real c) { return vcast_vd_d(c); } static INLINE real2 load(const real *ptr, int offset) { return vload_vd_p(&ptr[2*offset]); } static INLINE real2 loadu(const real *ptr, int offset) { return vloadu_vd_p(&ptr[2*offset]); } static INLINE void store(real *ptr, int offset, real2 v) { vstore_v_p_vd(&ptr[2*offset], v); } static INLINE void storeu(real *ptr, int offset, real2 v) { vstoreu_v_p_vd(&ptr[2*offset], v); } static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vd(&ptr[2*offset], v); } static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vd(ptr, offset, step, v); } static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vd(ptr, offset, step, v); } static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } #elif BASETYPEID == 2 #define LOG2VECWIDTH (LOG2VECTLENSP-1) #define VECWIDTH (1 << LOG2VECWIDTH) typedef float real; typedef vfloat real2; static int available(int name) { return vavailability_i(name); } static INLINE real2 uminus(real2 d0) { return vneg_vf_vf(d0); } static INLINE real2 uplusminus(real2 d0) { return vposneg_vf_vf(d0); } static INLINE real2 uminusplus(real2 d0) { return vnegpos_vf_vf(d0); } static INLINE real2 plus(real2 d0, real2 d1) { return vadd_vf_vf_vf(d0, d1); } static INLINE real2 minus(real2 d0, real2 d1) { return vsub_vf_vf_vf(d0, d1); } static INLINE real2 minusplus(real2 d0, real2 d1) { return vsubadd_vf_vf_vf(d0, d1); } static INLINE real2 times(real2 d0, real2 d1) { return vmul_vf_vf_vf(d0, d1); } static INLINE real2 ctimes(real2 d0, real d) { return vmul_vf_vf_vf(d0, vcast_vf_f(d)); } static INLINE real2 timesminusplus(real2 d0, real2 d2, real2 d1) { return vmlsubadd_vf_vf_vf_vf(d0, d2, d1); } static INLINE real2 ctimesminusplus(real2 d0, real c, real2 d1) { return vmlsubadd_vf_vf_vf_vf(d0, vcast_vf_f(c), d1); } static INLINE real2 reverse(real2 d0) { return vrev21_vf_vf(d0); } static INLINE real2 reverse2(real2 d0) { return vreva2_vf_vf(d0); } static INLINE real2 loadc(real c) { return vcast_vf_f(c); } static INLINE real2 load(const real *ptr, int offset) { return vload_vf_p(&ptr[2*offset]); } static INLINE real2 loadu(const real *ptr, int offset) { return vloadu_vf_p(&ptr[2*offset]); } static INLINE void store(real *ptr, int offset, real2 v) { vstore_v_p_vf(&ptr[2*offset], v); } static INLINE void storeu(real *ptr, int offset, real2 v) { vstoreu_v_p_vf(&ptr[2*offset], v); } static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vf(&ptr[2*offset], v); } static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vf(ptr, offset, step, v); } static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } #elif BASETYPEID == 3 #define LOG2VECWIDTH (LOG2VECTLENDP-1) #define VECWIDTH (1 << LOG2VECWIDTH) typedef long double real; typedef vlongdouble real2; static int available(int name) { return vavailability_i(name); } static INLINE real2 uminus(real2 d0) { return vneg_vl_vl(d0); } static INLINE real2 uplusminus(real2 d0) { return vposneg_vl_vl(d0); } static INLINE real2 uminusplus(real2 d0) { return vnegpos_vl_vl(d0); } static INLINE real2 plus(real2 d0, real2 d1) { return vadd_vl_vl_vl(d0, d1); } static INLINE real2 minus(real2 d0, real2 d1) { return vsub_vl_vl_vl(d0, d1); } static INLINE real2 minusplus(real2 d0, real2 d1) { return vsubadd_vl_vl_vl(d0, d1); } static INLINE real2 times(real2 d0, real2 d1) { return vmul_vl_vl_vl(d0, d1); } static INLINE real2 ctimes(real2 d0, real d) { return vmul_vl_vl_vl(d0, vcast_vl_l(d)); } static INLINE real2 timesminusplus(real2 d0, real2 d2, real2 d1) { return vmlsubadd_vl_vl_vl_vl(d0, d2, d1); } static INLINE real2 ctimesminusplus(real2 d0, real c, real2 d1) { return vmlsubadd_vl_vl_vl_vl(d0, vcast_vl_l(c), d1); } static INLINE real2 reverse(real2 d0) { return vrev21_vl_vl(d0); } static INLINE real2 reverse2(real2 d0) { return vreva2_vl_vl(d0); } static INLINE real2 loadc(real c) { return vcast_vl_l(c); } static INLINE real2 load(const real *ptr, int offset) { return vload_vl_p(&ptr[2*offset]); } static INLINE real2 loadu(const real *ptr, int offset) { return vloadu_vl_p(&ptr[2*offset]); } static INLINE void store(real *ptr, int offset, real2 v) { vstore_v_p_vl(&ptr[2*offset], v); } static INLINE void storeu(real *ptr, int offset, real2 v) { vstoreu_v_p_vl(&ptr[2*offset], v); } static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vl(&ptr[2*offset], v); } static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vl(ptr, offset, step, v); } static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vl(ptr, offset, step, v); } static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } #elif BASETYPEID == 4 #define LOG2VECWIDTH (LOG2VECTLENDP-1) #define VECWIDTH (1 << LOG2VECWIDTH) typedef Sleef_quad real; typedef vquad real2; static int available(int name) { return vavailability_i(name); } static INLINE real2 uminus(real2 d0) { return vneg_vq_vq(d0); } static INLINE real2 uplusminus(real2 d0) { return vposneg_vq_vq(d0); } static INLINE real2 uminusplus(real2 d0) { return vnegpos_vq_vq(d0); } static INLINE real2 plus(real2 d0, real2 d1) { return vadd_vq_vq_vq(d0, d1); } static INLINE real2 minus(real2 d0, real2 d1) { return vsub_vq_vq_vq(d0, d1); } static INLINE real2 minusplus(real2 d0, real2 d1) { return vsubadd_vq_vq_vq(d0, d1); } static INLINE real2 times(real2 d0, real2 d1) { return vmul_vq_vq_vq(d0, d1); } static INLINE real2 ctimes(real2 d0, real d) { return vmul_vq_vq_vq(d0, vcast_vq_q(d)); } static INLINE real2 timesminusplus(real2 d0, real2 d2, real2 d1) { return vmlsubadd_vq_vq_vq_vq(d0, d2, d1); } static INLINE real2 ctimesminusplus(real2 d0, real c, real2 d1) { return vmlsubadd_vq_vq_vq_vq(d0, vcast_vq_q(c), d1); } static INLINE real2 reverse(real2 d0) { return vrev21_vq_vq(d0); } static INLINE real2 reverse2(real2 d0) { return vreva2_vq_vq(d0); } static INLINE real2 loadc(real c) { return vcast_vq_q(c); } static INLINE real2 load(const real *ptr, int offset) { return vload_vq_p(&ptr[2*offset]); } static INLINE real2 loadu(const real *ptr, int offset) { return vloadu_vq_p(&ptr[2*offset]); } static INLINE void store(real *ptr, int offset, real2 v) { vstore_v_p_vq(&ptr[2*offset], v); } static INLINE void storeu(real *ptr, int offset, real2 v) { vstoreu_v_p_vq(&ptr[2*offset], v); } static INLINE void stream(real *ptr, int offset, real2 v) { vstream_v_p_vq(&ptr[2*offset], v); } static INLINE void scatter(real *ptr, int offset, int step, real2 v) { vscatter2_v_p_i_i_vq(ptr, offset, step, v); } static INLINE void scstream(real *ptr, int offset, int step, real2 v) { vsscatter2_v_p_i_i_vq(ptr, offset, step, v); } static INLINE void prefetch(real *ptr, int offset) { vprefetch_v_p(&ptr[2*offset]); } #else #error No BASETYPEID specified #endif #endif sleef-3.3.1/src/gencoef/000077500000000000000000000000001333715643700150255ustar00rootroot00000000000000sleef-3.3.1/src/gencoef/Makefile000066400000000000000000000003531333715643700164660ustar00rootroot00000000000000.PHONY: all all : gencoef gencoef : gencoef.c simplexfr.c sp.h dp.h ld.h qp.h gcc -O gencoef.c simplexfr.c -o gencoef -lmpfr -lm .PHONY: clean clean : rm -f gencoef gencoefdp gencoefld a.out *~ rm -f *.obj *.lib *.dll *.exp *.exe sleef-3.3.1/src/gencoef/dp.h000066400000000000000000000103411333715643700156000ustar00rootroot00000000000000// This is part of SLEEF, written by Naoki // Shibata. http://shibatch.sourceforge.net // The code in this file is distributed under the Creative Commons // Attribution 4.0 International License. #define PREC_TARGET 53 #if 0 #define N 8 // Degree of equation #define S 40 // Number of samples for phase 1 #define L 4 // Number of high precision coefficients #define MIN 0.0 // Min argument #define MAX (M_PI/4) // Max argument #define PMUL 2 // The form of polynomial is y = x^(PADD+PMUL*0) + x^(PADD+PMUL*1) + ... #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_sin(ret, a, GMP_RNDN); } // The function to approximate void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 // Fix coef 0 to 1.0 #endif #if 0 #define N 10 #define S 40 #define L 2 #define MIN 0.0 #define MAX (M_PI/4) void TARGET(mpfr_t ret, mpfr_t a) { // cos(x) - 1 mpfr_t x; mpfr_init(x); mpfr_cos(ret, a, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clear(x); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 #define FIXCOEF0 (-0.5) #endif #if 0 // for xsincospi4_u05 #define S 40 #define N 8 #define L 2 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_sin(ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 // for xsincospi4_u05 #define N 8 #define S 40 #define L 2 #define MIN 0.0 #define MAX 1.0 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_cos(ret, x, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 #endif #if 0 // for xsincospi4 #define N 7 #define S 40 #define L 0 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_sin(ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 #define N 17 #define S 60 #define L 0 #define MIN 0.0 #define MAX (M_PI/4) #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_tan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif #if 0 #define N 11 #define S 35 #define L 2 #define MIN 1 //0.75 #define MAX 1.5 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_log(ret, a, GMP_RNDN); } void CFUNC(mpfr_t frd, mpfr_t fra) { mpfr_t tmp, one; mpfr_inits(tmp, one, NULL); mpfr_set_d(one, 1, GMP_RNDN); mpfr_add(tmp, fra, one, GMP_RNDN); mpfr_sub(frd, fra, one, GMP_RNDN); mpfr_div(frd, frd, tmp, GMP_RNDN); mpfr_clears(tmp, one, NULL); } #define FIXCOEF0 2.0 #endif #if 1 #define N 12 #define S 50 #define L 2 #define MIN -0.347 #define MAX 0.347 // 0.5 log 2 #define PMUL 1 #define PADD 0 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_exp(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #define FIXCOEF1 1.0 //#define FIXCOEF2 0.5 #endif #if 0 #define N 21 #define S 100 #define L 1 #define P 1.1 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_atan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif #if 0 #define N 20 #define S 100 #define L 0 #define P 1.54 #define MIN 0.0 #define MAX 0.708 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_asin(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif sleef-3.3.1/src/gencoef/gencoef.c000066400000000000000000000176721333715643700166140ustar00rootroot00000000000000// This is part of SLEEF, written by Naoki Shibata. http://shibatch.sourceforge.net // Since the original code for simplex algorithm is developed by Haruhiko Okumura and // the code is distributed under the Creative Commons Attribution 4.0 International License, // the contents under this directory are also distributed under the same license. #include #include #include #include #include #include #include #include //#include "sp.h" #include "dp.h" //#include "ld.h" //#include "qp.h" #undef VERBOSE #define PREC 4096 #define EPS 1e-50 #define PREC2 (PREC_TARGET*4) #ifndef P #define P 1 #endif #ifndef Q #define Q 10000 #endif void mpfr_zinit(mpfr_t m); void regressMinRelError_fr(int n, int m, mpfr_t **x, mpfr_t *result); char *mpfrToStr(mpfr_t m) { mpfr_t fra; mpfr_init2(fra, mpfr_get_prec(m)); mpfr_abs(fra, m, GMP_RNDN); mpfr_exp_t e; char *s = mpfr_get_str(NULL, &e, 10, 0, fra, GMP_RNDN); char *ret = malloc(strlen(s) + 20); if (mpfr_sgn(m) == -1) ret[0] = '-'; else ret[0] = '+'; ret[1] = '0'; ret[2] = '.'; strcpy(&ret[3], s); mpfr_free_str(s); char estr[10]; sprintf(estr, "e%+d", (int)e); strcat(ret, estr); mpfr_clears(fra, NULL); return ret; } double countULP(mpfr_t d, mpfr_t c) { mpfr_t fry, frw; mpfr_inits(fry, frw, NULL); double c2 = mpfr_get_d(c, GMP_RNDN); if (c2 == 0 && mpfr_cmp_d(d, 0) != 0) return 10000; long e; mpfr_get_d_2exp(&e, c, GMP_RNDN); mpfr_set_ui_2exp(frw, 1, e-PREC_TARGET, GMP_RNDN); mpfr_sub(fry, d, c, GMP_RNDN); mpfr_div(fry, fry, frw, GMP_RNDN); double u = fabs(mpfr_get_d(fry, GMP_RNDN)); mpfr_clears(fry, frw, NULL); return u; } void func(mpfr_t s, mpfr_t x, mpfr_t *coef, int n) { mpfr_set_prec(s, PREC_TARGET); mpfr_set(s, coef[n-1], GMP_RNDN); for(int i=n-1;i>0;i--) { if (i == L-1) { mpfr_t t; mpfr_init2(t, PREC2); mpfr_set(t, s, GMP_RNDN); mpfr_set_prec(s, PREC2); mpfr_set(s, t, GMP_RNDN); mpfr_clear(t); } mpfr_mul(s, s, x, GMP_RNDN); mpfr_add(s, s, coef[i-1], GMP_RNDN); } } int main(int argc, char **argv) { int i, j; int n, m; double p; mpfr_set_default_prec(PREC); #if 0 { mpfr_t a, b; mpfr_inits(a, b, NULL); float x = M_PI; mpfr_set_d(a, x, GMP_RNDN); x = nexttowardf(x, 100); x = nexttowardf(x, 100); x = nexttowardf(x, 100); mpfr_set_d(b, x, GMP_RNDN); printf("%g\n", countULP(b, a)); exit(0); } #endif #if 0 { mpfr_t a, b; mpfr_inits(a, b, NULL); double x = M_PI; mpfr_set_d(a, x, GMP_RNDN); x = nexttoward(x, 100); x = nexttoward(x, 100); x = nexttoward(x, 100); mpfr_set_d(b, x, GMP_RNDN); printf("%g\n", countULP(b, a)); exit(0); } #endif #if 0 { mpfr_t a, b; mpfr_inits(a, b, NULL); long double x = M_PI; mpfr_set_ld(a, x, GMP_RNDN); x = nexttowardl(x, 100); x = nexttowardl(x, 100); x = nexttowardl(x, 100); mpfr_set_ld(b, x, GMP_RNDN); printf("%g\n", countULP(b, a)); exit(0); } #endif #if 0 { mpfr_t a, b; mpfr_inits(a, b, NULL); __float128 x = M_PI; mpfr_set_f128(a, x, GMP_RNDN); x = nextafterq(x, 100); x = nextafterq(x, 100); x = nextafterq(x, 100); mpfr_set_f128(b, x, GMP_RNDN); printf("%g\n", countULP(b, a)); exit(0); } #endif m = N+1; n = argc >= 2 ? atoi(argv[1]) : S; p = argc >= 3 ? atof(argv[2]) : P; mpfr_t **x, *result; // x[m][n], result[m] x = calloc(sizeof(mpfr_t *), m); result = calloc(sizeof(mpfr_t), m); for(i=0;i=0;i--) { mpfr_set_prec(fra, PREC_TARGET+4); mpfr_set(fra, result[i], GMP_RNDN); char *s; printf("%s, \n", s = mpfrToStr(fra)); free(s); } printf("\n"); mpfr_set_prec(fra, PREC); double emax = 0; for(i=0;i<=n*10;i++) { double a = i * (double)(MAX - MIN) / (n*10.0) + MIN; mpfr_set_d(fra, a, GMP_RNDN); CFUNC(frd, fra); mpfr_set_d(frb, 0, GMP_RNDN); for(j=m-1;j>=0;j--) { mpfr_set_d(frc, (double)j*PMUL+PADD, GMP_RNDN); mpfr_pow(frc, frd, frc, GMP_RNDN); mpfr_mul(frc, frc, result[j], GMP_RNDN); mpfr_add(frb, frb, frc, GMP_RNDN); } TARGET(frc, fra); double u = countULP(frb, frc); if (u > emax) emax = u; } printf("Phase 1 : Max error = %g ULP\n\n", emax); fflush(stdout); // mpfr_t bestcoef[N], curcoef[N]; for(i=0;i= L ? PREC_TARGET : PREC2); mpfr_set(bestcoef[i], result[i], GMP_RNDN); mpfr_init2(curcoef[i], i >= L ? PREC_TARGET : PREC2); mpfr_set(curcoef[i], result[i], GMP_RNDN); } srandom(time(NULL)); mpfr_set_default_prec(PREC2); static mpfr_t a[Q], v[Q], am[Q], aa[Q]; for(i=0;i=0;j--) { mpfr_set_d(frc, (double)j*PMUL+PADD, GMP_RNDN); mpfr_pow(frc, a[i], frc, GMP_RNDN); mpfr_mul(frc, frc, curcoef[j], GMP_RNDN); mpfr_add(frb, frb, frc, GMP_RNDN); } double e = countULP(frb, v[i]); //printf("c = %.20g, t = %.20g, ulp = %g\n", mpfr_get_d(v[i], GMP_RNDN), mpfr_get_d(frb, GMP_RNDN), e); if (!isfinite(e)) continue; if (e > emax) { emax = e; worstx = mpfr_get_d(a[i], GMP_RNDN); } esum += e; } mpfr_set_prec(frb, PREC); //printf("emax = %g\n", emax); if (emax < best || (emax == best && esum < bestsum)) { for(i=0;i 10) printf("Max error = %g ULP, Sum error = %g (Max error at %g)\n", emax, esum, worstx); if ((best - emax) / best > 0.0001) k = 0; best = emax; bestsum = esum; bestworstx = worstx; } for(i=0;i 0) { for(int j=0;jr;j--) mpfr_nextbelow(curcoef[i]); } } } printf("\n"); for(i=N-1;i>=0;i--) { mpfr_set_prec(fra, i >= L ? PREC_TARGET+4 : PREC2); mpfr_set(fra, bestcoef[i], GMP_RNDN); char *s; printf("%s, \n", s = mpfrToStr(fra)); free(s); } printf("\nPhase 2 : max error = %g ULP at %g\n", best, bestworstx); exit(0); } sleef-3.3.1/src/gencoef/gencoef.txt000066400000000000000000000035721333715643700172030ustar00rootroot00000000000000 With this small tool, the coefficients for polynomial approximation used in kernels can be generated. Usage Edit gencoefdp.c. In the beginning of the file, specifications of the parameters for generating coefficients are listed. Enable one of them by changing #if. Then, run make to compile the source code. Run the gencoef, and it will show the generated coefficients in a few minutes. How it works There are two phases of the program. The first phase is the regression for minimizing the maximum relative error. This problem can be reduced to a linear programming problem, and the Simplex method is used in this implementation. This requires multi-precision calculation, and the implementation uses the MPFR library to do this. In this phase, only a small number of values (specified by S macro, usually 40 or so) of the function to approximate are sampled within the argument range. The function to approximate can be given by FRFUNC function. Specifying higher values for S does not always give better results. The second phase is to optimize the coefficients so that it gives good accuracy with double precision calculation. In this phase, it checks 100000 points (specified by Q macro) within the specified argument range to see if the polynomial gives good error bound. In some cases, the last few terms have to be calculated in higher precision in order to achieve 1 ULP overall accuracy, and this implementation can take care of that. The L parameter specifies the number of high precision coefficients. In some cases, it is desirable to fix the last few coefficients to values like 1. This can be specified if you define FIXCOEF0 macro. This sometimes does not work, however. In this case, you need to specify the function to approximate as shown in the definition for cos. Finding a set of good parameters is not a straightforward process. You usually need many iterations of trial and error. sleef-3.3.1/src/gencoef/ld.h000066400000000000000000000076711333715643700156100ustar00rootroot00000000000000// This is part of SLEEF, written by Naoki // Shibata. http://shibatch.sourceforge.net // The code in this file is distributed under the Creative Commons // Attribution 4.0 International License. #define PREC_TARGET 64 #if 0 #define N 8 // Degree of equation #define S 40 // Number of samples for phase 1 #define L 4 // Number of high precision coefficients #define MIN 0.0 // Min argument #define MAX (M_PI/4) // Max argument #define PMUL 2 // The form of polynomial is y = x^(PADD+PMUL*0) + x^(PADD+PMUL*1) + ... #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_sin(ret, a, GMP_RNDN); } // The function to approximate void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 // Fix coef 0 to 1.0 #endif #if 0 #define N 10 #define S 40 #define L 2 #define MIN 0.0 #define MAX (M_PI/4) void TARGET(mpfr_t ret, mpfr_t a) { // cos(x) - 1 mpfr_t x; mpfr_init(x); mpfr_cos(ret, a, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clear(x); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 #define FIXCOEF0 (-0.5) #endif #if 0 // for xsincospi4_u05 #define N 9 #define S 40 #define L 2 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_sin(ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 // for xsincospi4_u05 #define N 9 #define S 40 #define L 2 #define MIN 0.0 #define MAX 1.0 void TARGET(mpfr_t ret, mpfr_t a) { // cos(x) - 1 mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_cos(ret, x, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 #endif #if 0 // for xsincospi4 #define N 7 #define S 40 #define L 0 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_sin(ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 #define N 17 #define S 40 #define L 0 #define MIN 0.0 #define MAX (M_PI/4) #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_tan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif #if 0 #define N 9 #define S 40 #define L 2 #define MIN 1 //0.75 #define MAX 1.5 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_log(ret, a, GMP_RNDN); } void CFUNC(mpfr_t frd, mpfr_t fra) { mpfr_t tmp, one; mpfr_inits(tmp, one, NULL); mpfr_set_d(one, 1, GMP_RNDN); mpfr_add(tmp, fra, one, GMP_RNDN); mpfr_sub(frd, fra, one, GMP_RNDN); mpfr_div(frd, frd, tmp, GMP_RNDN); mpfr_clear(tmp, one, NULL); } #define FIXCOEF0 2.0 #endif #if 0 #define N 12 #define S 50 #define L 0 #define MIN -0.347 #define MAX 0.347 // 0.5 log 2 #define PMUL 1 #define PADD 0 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_exp(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #define FIXCOEF1 1.0 #define FIXCOEF2 0.5 #endif #if 0 #define N 22 #define S 100 #define L 2 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_atan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif sleef-3.3.1/src/gencoef/qp.h000066400000000000000000000071431333715643700156230ustar00rootroot00000000000000// This is part of SLEEF, written by Naoki // Shibata. http://shibatch.sourceforge.net // The code in this file is distributed under the Creative Commons // Attribution 4.0 International License. #define PREC_TARGET 113 // #if 0 #define N 15 // Degree of equation #define S 150 // Number of samples for phase 1 #define L 0 // Number of high precision coefficients #define P 0.37 #define MIN 0.0 // Min argument #define MAX (M_PI/2) // Max argument #define PMUL 2 // The form of polynomial is y = x^(PADD+PMUL*0) + x^(PADD+PMUL*1) + ... #define PADD 3 void TARGET(mpfr_t ret, mpfr_t a) { // The function to approximate mpfr_sin(ret, a, GMP_RNDN); mpfr_sub(ret, ret, a, GMP_RNDN); // ret = sin(a) - a } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 #define N 15 #define S 150 #define L 0 #define MIN 0.0 #define MAX (M_PI/2) void TARGET(mpfr_t ret, mpfr_t a) { // cos(x) - 1 mpfr_t x; mpfr_init(x); mpfr_cos(ret, a, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clear(x); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 //#define FIXCOEF0 (-0.5) #endif #if 0 // for xsincospi4_u05 #define N 13 #define S 150 #define L 2 #define P 0.9 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_sin(ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #endif #if 0 // for xsincospi4_u05 #define N 13 #define S 150 #define L 2 #define MIN 0.0 #define MAX 1.0 void TARGET(mpfr_t ret, mpfr_t a) { // cos(x) - 1 mpfr_t x, y; mpfr_inits(x, y, NULL); mpfr_const_pi(x, GMP_RNDN); mpfr_set_d(y, 1.0/4, GMP_RNDN); mpfr_mul(x, x, y, GMP_RNDN); mpfr_mul(x, x, a, GMP_RNDN); mpfr_cos(ret, x, GMP_RNDN); mpfr_set_ld(x, 1, GMP_RNDN); mpfr_sub(ret, ret, x, GMP_RNDN); mpfr_clears(x, y, NULL); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define PMUL 2 #define PADD 2 #endif #if 0 // running #define N 31 #define S 100 #define P 1.7 #define L 0 #define MIN 0.0 #define MAX (M_PI/4) #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_tan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif #if 0 // running #define N 20 #define S 110 #define L 2 #define MIN 1 //0.75 #define MAX 1.5 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_log(ret, a, GMP_RNDN); } void CFUNC(mpfr_t frd, mpfr_t fra) { mpfr_t tmp, one; mpfr_inits(tmp, one, NULL); mpfr_set_d(one, 1, GMP_RNDN); mpfr_add(tmp, fra, one, GMP_RNDN); mpfr_sub(frd, fra, one, GMP_RNDN); mpfr_div(frd, frd, tmp, GMP_RNDN); mpfr_clears(tmp, one, NULL); } #define FIXCOEF0 2.0 #endif #if 1 #define N 22 #define S 140 #define L 2 #define MIN -0.347 #define MAX 0.347 // 0.5 log 2 #define PMUL 1 #define PADD 0 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_exp(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #define FIXCOEF1 1.0 //#define FIXCOEF2 0.5 #endif #if 0 // running #define N 45 #define S 100 #define P 1.55 #define L 2 #define MIN 0.0 #define MAX 1.0 #define PMUL 2 #define PADD 1 void TARGET(mpfr_t ret, mpfr_t a) { mpfr_atan(ret, a, GMP_RNDN); } void CFUNC(mpfr_t dst, mpfr_t src) { mpfr_set(dst, src, GMP_RNDN); } #define FIXCOEF0 1.0 #endif sleef-3.3.1/src/gencoef/simplexfr.c000066400000000000000000000227761333715643700172200ustar00rootroot00000000000000// The original code for simplex algorithm is taken from Haruhiko Okumura's book. // https://oku.edu.mie-u.ac.jp/~okumura/algo/ // The code is distributed under the Creative Commons Attribution 4.0 International License. // https://creativecommons.org/licenses/by/4.0/ // The code is modified by Naoki Shibata to process arbitrary precision numbers. #include #include #include #include #include #include #include #define PREC 4096 #define EPS 1e-50 #define OK 0 #define MAXIMIZABLE_TO_INFINITY 1 #define NOT_FEASIBLE 2 #define ERROR (-1) #define NOP (-1) #define EQU (0) #define LEQ 1 #define GEQ 2 static int m, n, n1, n2, n3, jmax; static int *col, *row, *nonzero_row, *inequality; static mpfr_t **a, *c, **q, *pivotcolumn; static mpfr_t zero, one, eps, minuseps, large; void mpfr_zinit(mpfr_t m) { mpfr_init(m); mpfr_set_d(m, 0, GMP_RNDN); } static void init(int n0, int m0) { int i, j; m = m0; n = n0; mpfr_init(zero); mpfr_set_d(zero, 0, GMP_RNDN); mpfr_init(one); mpfr_set_d(one, 1, GMP_RNDN); mpfr_init(eps); mpfr_set_d(eps, EPS, GMP_RNDN); mpfr_init(minuseps); mpfr_set_d(minuseps, -EPS, GMP_RNDN); mpfr_init(large); mpfr_set_d(large, 1.0 / EPS, GMP_RNDN); a = malloc(sizeof(mpfr_t *) * (m + 1)); for(i=0;i < m+1;i++) { a[i] = malloc(sizeof(mpfr_t) * (n + 1)); for(j=0;j < (n+1);j++) { mpfr_zinit(a[i][j]); } } q = malloc(sizeof(mpfr_t *) * (m + 1)); for(i=0;i < m+1;i++) { q[i] = malloc(sizeof(mpfr_t) * (m + 1)); for(j=0;j < m+1;j++) { mpfr_zinit(q[i][j]); } } c = malloc(sizeof(mpfr_t) * (n + 1)); for(j=0;j < (n+1);j++) { mpfr_zinit(c[j]); } pivotcolumn = malloc(sizeof(mpfr_t) * (m + 1)); for(j=0;j < (m+1);j++) { mpfr_zinit(pivotcolumn[j]); } col = calloc(m+1, sizeof(int)); row = calloc(n+2*m+1, sizeof(int)); nonzero_row = calloc(n+2*m+1, sizeof(int)); inequality = calloc(m+1, sizeof(int)); } static void dispose() { mpfr_clears(zero, one, eps, minuseps, large, (mpfr_ptr)0); int i, j; for(i=0;i < m+1;i++) { for(j=0;j < m+1;j++) { mpfr_clear(q[i][j]); } free(q[i]); } free(q); for(i=0;i < m+1;i++) { for(j=0;j < n+1;j++) { mpfr_clear(a[i][j]); } free(a[i]); } free(a); for(j=0;j < n+1;j++) { mpfr_clear(c[j]); } free(c); for(j=0;j < m+1;j++) { mpfr_clear(pivotcolumn[j]); } free(pivotcolumn); free(col); free(row); free(nonzero_row); free(inequality); } static void prepare() { int i; n1 = n; for (i = 1; i <= m; i++) if (inequality[i] == GEQ) { n1++; nonzero_row[n1] = i; } n2 = n1; for (i = 1; i <= m; i++) if (inequality[i] == LEQ) { n2++; col[i] = n2; nonzero_row[n2] = row[n2] = i; } n3 = n2; for (i = 1; i <= m; i++) if (inequality[i] != LEQ) { n3++; col[i] = n3; nonzero_row[n3] = row[n3] = i; } for (i = 0; i <= m; i++) { mpfr_set_d(q[i][i], 1, GMP_RNDN); } } static void tableau(mpfr_t ret, int i, int j) { int k; if (col[i] < 0) { mpfr_set_d(ret, 0, GMP_RNDN); return; } if (j <= n) { mpfr_t s; mpfr_zinit(s); mpfr_set_d(s, 0, GMP_RNDN); mpfr_t *tab = malloc(sizeof(mpfr_t) * (m + 1)); mpfr_ptr *ptab = malloc(sizeof(mpfr_ptr) * (m + 1)); for (k = 0; k <= m; k++) { mpfr_zinit(tab[k]); ptab[k] = (mpfr_ptr)&tab[k]; mpfr_mul(tab[k], q[i][k], a[k][j], GMP_RNDN); } mpfr_sum(s, ptab, m+1, GMP_RNDN); for (k = 0; k <= m; k++) { mpfr_clear(tab[k]); } free(ptab); free(tab); mpfr_set(ret, s, GMP_RNDN); mpfr_clear(s); return; } mpfr_set(ret, q[i][nonzero_row[j]], GMP_RNDN); if (j <= n1) { mpfr_neg(ret, ret, GMP_RNDN); return; } if (j <= n2 || i != 0) return; mpfr_add(ret, ret, one, GMP_RNDN); return; } static void pivot(int ipivot, int jpivot) { int i, j; mpfr_t u; mpfr_zinit(u); mpfr_set(u, pivotcolumn[ipivot], GMP_RNDN); for (j = 1; j <= m; j++) { mpfr_div(q[ipivot][j], q[ipivot][j], u, GMP_RNDN); } for (i = 0; i <= m; i++) if (i != ipivot) { mpfr_set(u, pivotcolumn[i], GMP_RNDN); for (j = 1; j <= m; j++) { mpfr_fms(q[i][j], q[ipivot][j], u, q[i][j], GMP_RNDN); mpfr_neg(q[i][j], q[i][j], GMP_RNDN); } } row[col[ipivot]] = 0; col[ipivot] = jpivot; row[jpivot] = ipivot; mpfr_clear(u); } static int minimize() { int i, ipivot, jpivot; mpfr_t t, u; mpfr_inits(t, u, (mpfr_ptr)0); for (;;) { for (jpivot = 1; jpivot <= jmax; jpivot++) { if (row[jpivot] == 0) { tableau(pivotcolumn[0], 0, jpivot); if (mpfr_cmp(pivotcolumn[0], minuseps) < 0) break; } } if (jpivot > jmax) { mpfr_clears(t, u, (mpfr_ptr)0); return 1; } mpfr_set(u, large, GMP_RNDN); ipivot = 0; for (i = 1; i <= m; i++) { tableau(pivotcolumn[i], i, jpivot); if (mpfr_cmp(pivotcolumn[i], eps) > 0) { tableau(t, i, 0); mpfr_div(t, t, pivotcolumn[i], GMP_RNDN); if (mpfr_cmp(t, u) < 0) { ipivot = i; mpfr_set(u, t, GMP_RNDN); } } } if (ipivot == 0) { mpfr_clears(t, u, (mpfr_ptr)0); return 0; // the objective function can be minimized to -infinite } pivot(ipivot, jpivot); } } static int phase1() { int i, j; mpfr_t u; mpfr_zinit(u); jmax = n3; for (i = 0; i <= m; i++) { if (col[i] > n2) mpfr_set_d(q[0][i], -1, GMP_RNDN); } minimize(); tableau(u, 0, 0); if (mpfr_cmp(u, minuseps) < 0) { mpfr_clear(u); return 0; } for (i = 1; i <= m; i++) { if (col[i] > n2) { col[i] = -1; } } mpfr_set_d(q[0][0], 1, GMP_RNDN); for (j = 1; j <= m; j++) mpfr_set_d(q[0][j], 0, GMP_RNDN); for (i = 1; i <= m; i++) { if ((j = col[i]) > 0 && j <= n && mpfr_cmp_d(c[j], 0) != 0) { mpfr_set(u, c[j], GMP_RNDN); for (j = 1; j <= m; j++) { mpfr_fms(q[0][j], q[i][j], u, q[0][j], GMP_RNDN); mpfr_neg(q[0][j], q[0][j], GMP_RNDN); } } } mpfr_clear(u); return 1; } static int phase2() { int j; jmax = n2; for (j = 0; j <= n; j++) { mpfr_set(a[0][j], c[j], GMP_RNDN); } return minimize(); } int solve_fr(mpfr_t *result, int n0, int m0, mpfr_t **a0, int *ineq0, mpfr_t *c0) { int i,j; m = m0; // number of inequations n = n0+1; // number of variables init(n, m); mpfr_t csum; mpfr_zinit(csum); for(j=0;j /dev/null) ARCH := $(shell uname -p) all : ifndef BUILDDIR @echo @echo Please set the build directory to BUILDDIR environment variable and run make once again. @echo e.g. export BUILDDIR='`pwd`'/../../build @echo else @echo @echo You can start measurement by "'"make measure"'". ifdef ICCAVAILABLE @echo You can start measurement with SVML by "'"make measureSVML"'". endif @echo Then, you can plot the results of measurement by "'"make plot"'". @echo @echo You have to install java and gnuplot to do plotting. @echo Stop all tasks on the computer before starting measurement. @echo endif benchsvml128_10.o : benchsvml128.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml128.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml128_10.o benchsvml128_40.o : benchsvml128.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml128.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml128_40.o benchsvml256_10.o : benchsvml256.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml256.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml256_10.o benchsvml256_40.o : benchsvml256.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml256.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -march=core-avx2 -O0 -lm -c -o benchsvml256_40.o benchsvml512_10.o : benchsvml512.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml512.c -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -xCOMMON-AVX512 -O0 -lm -c -o benchsvml512_10.o benchsvml512_40.o : benchsvml512.c bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml512.c -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -xCOMMON-AVX512 -O0 -lm -c -o benchsvml512_40.o benchsvml_10 : benchsvml.c benchsvml128_10.o benchsvml256_10.o benchsvml512_10.o bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml.c benchsvml128_10.o benchsvml256_10.o benchsvml512_10.o -Wall -I.. -DSVMLULP=1 -fimf-max-error=1.0 -fimf-domain-exclusion=0 -O0 -march=native -lm -o benchsvml_10 benchsvml_40 : benchsvml.c benchsvml128_40.o benchsvml256_40.o benchsvml512_40.o bench.h -command -v icc >/dev/null 2>&1 && icc benchsvml.c benchsvml128_40.o benchsvml256_40.o benchsvml512_40.o -Wall -I.. -DSVMLULP=4 -fimf-max-error=4.0 -fimf-domain-exclusion=0 -O0 -march=native -lm -o benchsvml_40 # ifeq ($(ARCH),aarch64) benchsleef : benchsleef.c benchsleef128.o bench.h $(CC) benchsleef.c benchsleef128.o -Wall -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef benchsleef128.o : benchsleef128.c bench.h $(CC) benchsleef128.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c else benchsleef : benchsleef.c benchsleef128.o benchsleef256.o benchsleef512.o bench.h $(CC) benchsleef.c benchsleef128.o benchsleef256.o benchsleef512.o -Wall -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -lsleef -lm -o benchsleef benchsleef128.o : benchsleef128.c bench.h $(CC) benchsleef128.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c benchsleef256.o : benchsleef256.c bench.h $(CC) benchsleef256.c -Wall -march=native -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c benchsleef512.o : benchsleef512.c bench.h $(CC) benchsleef512.c -Wall -mavx512f -O0 -g -I$(BUILDDIR)/include -L$(BUILDDIR)/lib -Wno-attributes -c endif # ProcessData.class : ProcessData.java javac ProcessData.java # ifndef BUILDDIR measure : @echo @echo Please set the build directory to BUILDDIR environment variable and run make once again. @echo e.g. export BUILDDIR='`pwd`'/../../build @echo else measure : benchsleef chmod +x ./measure.sh LD_LIBRARY_PATH=$(BUILDDIR)/lib ./measure.sh ./benchsleef @echo @echo Now, you can plot the results of measurement by "'"make plot"'". @echo You can do another measurement by "'"make measure"'". ifdef ICCAVAILABLE @echo You can start another measurement with SVML by "'"make measureSVML"'". endif @echo You can start over by "'"make restart"'". @echo endif measureSVML : all benchsvml_10 benchsvml_40 chmod +x ./measure.sh ./measure.sh ./benchsvml_10 ./benchsvml_40 @echo @echo Now, you can plot the results of measurement by "'"make plot"'". @echo You can do another measurement by "'"make measure"'". ifdef ICCAVAILABLE @echo You can start another measurement with SVML by "'"make measureSVML"'". endif @echo You can start over by "'"make restart"'". @echo plot : ProcessData.class counter.txt java ProcessData *dptrig*.out gnuplot script.out mv output.png trigdp.png java ProcessData *dpnontrig*.out gnuplot script.out mv output.png nontrigdp.png java ProcessData *sptrig*.out gnuplot script.out mv output.png trigsp.png java ProcessData *spnontrig*.out gnuplot script.out mv output.png nontrigsp.png @echo @echo Plotted results are in trigdp.png, nontrigdp.png, trigsp.png and nontrigsp.png. @echo clean : rm -f *~ a.out *.so *.so.* *.a *.s *.o rm -rf *.dSYM *.dylib rm -f *.obj *.lib *.dll *.exp *.exe *.stackdump rm -f *.class *.png benchsleef benchsvml_10 benchsvml_40 *.out counter.txt restart : rm -f *.out counter.txt sleef-3.3.1/src/libm-benchmarks/ProcessData.java000066400000000000000000000121601333715643700215300ustar00rootroot00000000000000import java.util.*; import java.io.*; public class ProcessData { static final int DP = 64, SP = 32; static LinkedHashMap funcNameOrder = new LinkedHashMap(); static class Key { final String funcName; final int prec, bits; final ArrayList range = new ArrayList(); final double ulps; Key(String s) { String[] a = s.split(","); funcName = a[0].trim(); if (funcNameOrder.get(funcName) == null) { funcNameOrder.put(funcName, funcNameOrder.size()); } prec = a[1].trim().equals("DP") ? DP : a[1].trim().equals("SP") ? SP : 0; bits = Integer.parseInt(a[2].trim()); int c; for(c = 3;;c++) { if (a[c].trim().endsWith("ulps")) break; range.add(Double.parseDouble(a[c])); } ulps = Double.parseDouble(a[c].trim().replace("ulps", "")); } public int hashCode() { int h = funcName.hashCode(); h ^= prec ^ bits; return h; } public boolean equals(Object o) { if (this == o) return true; Key k = (Key) o; if (funcName.compareTo(k.funcName) != 0) return false; if (prec != k.prec) return false; if (bits != k.bits) return false; if (range.size() != k.range.size()) return false; for(int i=0;i { public int compare(Key d0, Key d1) { if (d0 == d1) return 0; if (d0.prec < d1.prec) return 1; if (d0.prec > d1.prec) return -1; if (d0.ulps > d1.ulps) return 1; if (d0.ulps < d1.ulps) return -1; int fc = (int)funcNameOrder.get(d0.funcName) - (int)funcNameOrder.get(d1.funcName); if (fc != 0) return fc; if (d0.bits > d1.bits) return 1; if (d0.bits < d1.bits) return -1; if (d0.range.size() > d1.range.size()) return 1; if (d0.range.size() < d1.range.size()) return -1; for(int i=0;i d1.range.get(i)) return 1; if (d0.range.get(i) < d1.range.get(i)) return -1; } return 0; } } public static void main(String[] args) throws Exception { LinkedHashMap> allData = new LinkedHashMap>(); TreeSet allKeys = new TreeSet(new KeyComparator()); LinkedHashSet allColumnTitles = new LinkedHashSet(); double maximum = 0; for(int i=0;i v = allData.get(key); if (v == null) { v = new LinkedHashMap(); allData.put(key, v); } String[] a = s.split(","); double time = Double.parseDouble(a[a.length-1]); v.put(columnTitle, time); maximum = Math.max(maximum, time); } lnr.close(); } PrintStream ps = new PrintStream("data.out"); for(Key k : allKeys) { ps.print("\"" + k + "\" "); LinkedHashMap v = allData.get(k); for(String s : allColumnTitles) { Double d = v.get(s); if (d != null) ps.print(d); if (d == null) ps.print("0"); ps.print("\t"); } ps.println(); } ps.close(); ps = new PrintStream("script.out"); ps.println("set terminal pngcairo size 1280, 800 font \",10\""); ps.println("set output \"output.png\""); ps.println("color00 = \"#FF5050\";"); // red ps.println("color01 = \"#0066FF\";"); // blue ps.println("color02 = \"#00FF00\";"); // green ps.println("color03 = \"#FF9900\";"); // orange ps.println("color04 = \"#CC00CC\";"); // purple ps.println("color05 = \"#880000\";"); // brown ps.println("color06 = \"#003300\";"); // dark green ps.println("color07 = \"#000066\";"); // dark blue ps.println("set style data histogram"); ps.println("set style histogram cluster gap 1"); ps.println("set style fill solid 1.00"); ps.println("set boxwidth 0.9"); ps.println("set xtics format \"\""); ps.println("set xtics rotate by -90"); ps.println("set grid ytics"); ps.println("set ylabel \"Execution time in micro sec.\""); ps.println("set yrange [0:*]"); ps.println("set bmargin 24"); ps.println("set title \"Single execution time in micro sec.\""); ps.print("plot"); int i = 0; for(String s : allColumnTitles) { ps.print("\"data.out\" using " + (i+2) + ":xtic(1) title \"" + s + "\" linecolor rgb color" + String.format("%02d", i)); if (i != allColumnTitles.size()-1) ps.print(", "); i++; } ps.println(); ps.close(); } } sleef-3.3.1/src/libm-benchmarks/bench.h000066400000000000000000000050001333715643700177000ustar00rootroot00000000000000#define NITER1 100000 #define NITER2 10000 #define NITER (NITER1 * NITER2) #define callFuncSLEEF1_1(funcName, name, xmin, xmax, ulp, arg, type) ({ \ printf("%s\n", #funcName); \ uint64_t t = Sleef_currentTimeMicros(); \ for(int j=0;j #include #include #include #include #include #include #include "bench.h" int veclen = 16; double *abufdp, *bbufdp; float *abufsp, *bbufsp; FILE *fp; #if defined(__i386__) || defined(__x86_64__) void x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { uint32_t a, b, c, d; __asm__ __volatile__ ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (eax), "c"(ecx)); out[0] = a; out[1] = b; out[2] = c; out[3] = d; } int cpuSupportsAVX() { int32_t reg[4]; x86CpuID(reg, 1, 0); return (reg[2] & (1 << 28)) != 0; } int cpuSupportsAVX512F() { int32_t reg[4]; x86CpuID(reg, 7, 0); return (reg[1] & (1 << 16)) != 0; } #endif void fillDP(double *buf, double min, double max) { for(int i=0;i= 3) fnBase = argv[2]; srandom(time(NULL)); #if defined(__i386__) || defined(__x86_64__) int do128bit = 1; int do256bit = cpuSupportsAVX(); int do512bit = cpuSupportsAVX512F(); #elif defined(__ARM_NEON) int do128bit = 1; #else #error Unsupported architecture #endif posix_memalign((void **)&abufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double)); posix_memalign((void **)&bbufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double)); abufsp = (float *)abufdp; bbufsp = (float *)bbufdp; sprintf(fn, "%sdptrig.out", fnBase); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do128bit) benchSleef128_DPTrig(); #if defined(__i386__) || defined(__x86_64__) if (do256bit) benchSleef256_DPTrig(); if (do512bit) benchSleef512_DPTrig(); #endif fclose(fp); sprintf(fn, "%sdpnontrig.out", fnBase); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do128bit) benchSleef128_DPNontrig(); #if defined(__i386__) || defined(__x86_64__) if (do256bit) benchSleef256_DPNontrig(); if (do512bit) benchSleef512_DPNontrig(); #endif fclose(fp); sprintf(fn, "%ssptrig.out", fnBase); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do128bit) benchSleef128_SPTrig(); #if defined(__i386__) || defined(__x86_64__) if (do256bit) benchSleef256_SPTrig(); if (do512bit) benchSleef512_SPTrig(); #endif fclose(fp); sprintf(fn, "%sspnontrig.out", fnBase); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do128bit) benchSleef128_SPNontrig(); #if defined(__i386__) || defined(__x86_64__) if (do256bit) benchSleef256_SPNontrig(); if (do512bit) benchSleef512_SPNontrig(); #endif fclose(fp); exit(0); } sleef-3.3.1/src/libm-benchmarks/benchsleef128.c000066400000000000000000000203421333715643700211530ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __SSE2__ #if defined(_MSC_VER) #include #else #include #endif typedef __m128d vdouble; typedef __m128 vfloat; #define ENABLED #elif defined(__ARM_NEON) #include typedef float64x2_t vdouble; typedef float32x4_t vfloat; #define ENABLED #endif #ifdef ENABLED void benchSleef128_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 6.28, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 1e+6, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSLEEF1_1(Sleef_sind2_u10 , "sin, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u10 , "cos, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u10 , "tan, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u10, "sincos, DP, 128", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind2_u35 , "sin, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd2_u35 , "cos, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand2_u35 , "tan, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd2_u35, "sincos, DP, 128", 0, 1e+100, 4.0, abufdp, vdouble); } void benchSleef128_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSLEEF1_1(Sleef_logd2_u10 , "log, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log10d2_u10, "log10, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log1pd2_u10, "log1p, DP, 128", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_logd2_u35 , "log, DP, 128", 0, 1e+300, 4.0, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSLEEF1_1(Sleef_expd2_u10 , "exp, DP, 128", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp2d2_u10 , "exp2, DP, 128", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp10d2_u10, "exp10, DP, 128", -700, 700, 1.0, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSLEEF1_2(Sleef_powd2_u10, "pow, DP, 128", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble); fillDP(abufdp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asind2_u10, "asin, DP, 128", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd2_u10, "acos, DP, 128", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_asind2_u35, "asin, DP, 128", -1.0, 1.0, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd2_u35, "acos, DP, 128", -1.0, 1.0, 4.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSLEEF1_1(Sleef_atand2_u10, "atan, DP, 128", -10, 10, 1.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d2_u10, "atan2, DP, 128", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble); callFuncSLEEF1_1(Sleef_atand2_u35, "atan, DP, 128", -10, 10, 4.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d2_u35, "atan2, DP, 128", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble); } void benchSleef128_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSLEEF1_1(Sleef_sinf4_u10 , "sin, SP, 128", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf4_u10 , "cos, SP, 128", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf4_u10 , "tan, SP, 128", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf4_u10, "sincos, SP, 128", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf4_u35 , "sin, SP, 128", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf4_u35 , "cos, SP, 128", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf4_u35 , "tan, SP, 128", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf4_u35, "sincos, SP, 128", 0, 6.28, 4.0, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSLEEF1_1(Sleef_sinf4_u10 , "sin, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf4_u10 , "cos, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf4_u10 , "tan, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf4_u10, "sincos, SP, 128", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf4_u35 , "sin, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf4_u35 , "cos, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf4_u35 , "tan, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf4_u35, "sincos, SP, 128", 0, 1e+20, 4.0, abufsp, vfloat); } void benchSleef128_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSLEEF1_1(Sleef_logf4_u10 , "log, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_log10f4_u10, "log10, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf4_u10, "log1p, SP, 128", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_logf4_u35 , "log, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log10f4_u35, "log10, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf4_u35, "log1p, SP, 128", 0, 1e+38, 4.0, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSLEEF1_1(Sleef_expf4_u10 , "exp, SP, 128", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp2f4_u10 , "exp2, SP, 128", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp10f4_u10, "exp10, SP, 128", -100, 100, 1.0, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSLEEF1_2(Sleef_powf4_u10, "pow, SP, 128", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat); fillSP(abufsp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asinf4_u10, "asin, SP, 128", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf4_u10, "acos, SP, 128", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_asinf4_u35, "asin, SP, 128", -1.0, 1.0, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf4_u35, "acos, SP, 128", -1.0, 1.0, 4.0, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSLEEF1_1(Sleef_atanf4_u10, "atan, SP, 128", -10, 10, 1.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f4_u10, "atan2, SP, 128", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat); callFuncSLEEF1_1(Sleef_atanf4_u35, "atan, SP, 128", -10, 10, 4.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f4_u35, "atan2, SP, 128", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void benchSleef128_DPTrig() {} void benchSleef128_DPNontrig() {} void benchSleef128_SPTrig() {} void benchSleef128_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.3.1/src/libm-benchmarks/benchsleef256.c000066400000000000000000000201771333715643700211630ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __AVX__ #if defined(_MSC_VER) #include #else #include #endif typedef __m256d vdouble; typedef __m256 vfloat; #define ENABLED #endif #ifdef ENABLED void benchSleef256_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 6.28, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 1e+6, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSLEEF1_1(Sleef_sind4_u10 , "sin, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u10 , "cos, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u10 , "tan, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u10, "sincos, DP, 256", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind4_u35 , "sin, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd4_u35 , "cos, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand4_u35 , "tan, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd4_u35, "sincos, DP, 256", 0, 1e+100, 4.0, abufdp, vdouble); } void benchSleef256_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSLEEF1_1(Sleef_logd4_u10 , "log, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log10d4_u10, "log10, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log1pd4_u10, "log1p, DP, 256", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_logd4_u35 , "log, DP, 256", 0, 1e+300, 4.0, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSLEEF1_1(Sleef_expd4_u10 , "exp, DP, 256", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp2d4_u10 , "exp2, DP, 256", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp10d4_u10, "exp10, DP, 256", -700, 700, 1.0, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSLEEF1_2(Sleef_powd4_u10, "pow, DP, 256", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble); fillDP(abufdp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asind4_u10, "asin, DP, 256", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd4_u10, "acos, DP, 256", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_asind4_u35, "asin, DP, 256", -1.0, 1.0, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd4_u35, "acos, DP, 256", -1.0, 1.0, 4.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSLEEF1_1(Sleef_atand4_u10, "atan, DP, 256", -10, 10, 1.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d4_u10, "atan2, DP, 256", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble); callFuncSLEEF1_1(Sleef_atand4_u35, "atan, DP, 256", -10, 10, 4.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d4_u35, "atan2, DP, 256", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble); } void benchSleef256_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSLEEF1_1(Sleef_sinf8_u10 , "sin, SP, 256", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf8_u10 , "cos, SP, 256", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf8_u10 , "tan, SP, 256", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf8_u10, "sincos, SP, 256", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf8_u35 , "sin, SP, 256", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf8_u35 , "cos, SP, 256", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf8_u35 , "tan, SP, 256", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf8_u35, "sincos, SP, 256", 0, 6.28, 4.0, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSLEEF1_1(Sleef_sinf8_u10 , "sin, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf8_u10 , "cos, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf8_u10 , "tan, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf8_u10, "sincos, SP, 256", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf8_u35 , "sin, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf8_u35 , "cos, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf8_u35 , "tan, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf8_u35, "sincos, SP, 256", 0, 1e+20, 4.0, abufsp, vfloat); } void benchSleef256_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSLEEF1_1(Sleef_logf8_u10 , "log, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_log10f8_u10, "log10, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf8_u10, "log1p, SP, 256", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_logf8_u35 , "log, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log10f8_u35, "log10, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf8_u35, "log1p, SP, 256", 0, 1e+38, 4.0, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSLEEF1_1(Sleef_expf8_u10 , "exp, SP, 256", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp2f8_u10 , "exp2, SP, 256", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp10f8_u10, "exp10, SP, 256", -100, 100, 1.0, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSLEEF1_2(Sleef_powf8_u10, "pow, SP, 256", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat); fillSP(abufsp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asinf8_u10, "asin, SP, 256", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf8_u10, "acos, SP, 256", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_asinf8_u35, "asin, SP, 256", -1.0, 1.0, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf8_u35, "acos, SP, 256", -1.0, 1.0, 4.0, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSLEEF1_1(Sleef_atanf8_u10, "atan, SP, 256", -10, 10, 1.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f8_u10, "atan2, SP, 256", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat); callFuncSLEEF1_1(Sleef_atanf8_u35, "atan, SP, 256", -10, 10, 4.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f8_u35, "atan2, SP, 256", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void zeroupper256() {} void benchSleef256_DPTrig() {} void benchSleef256_DPNontrig() {} void benchSleef256_SPTrig() {} void benchSleef256_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.3.1/src/libm-benchmarks/benchsleef512.c000066400000000000000000000202161333715643700211500ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __AVX512F__ #if defined(_MSC_VER) #include #else #include #endif typedef __m512d vdouble; typedef __m512 vfloat; #define ENABLED #endif #ifdef ENABLED void benchSleef512_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 6.28, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 6.28, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 6.28, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 1e+6, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 1e+6, 4.0, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSLEEF1_1(Sleef_sind8_u10 , "sin, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u10 , "cos, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u10 , "tan, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u10, "sincos, DP, 512", 0, 1e+100, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sind8_u35 , "sin, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_cosd8_u35 , "cos, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_tand8_u35 , "tan, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_sincosd8_u35, "sincos, DP, 512", 0, 1e+100, 4.0, abufdp, vdouble); } void benchSleef512_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSLEEF1_1(Sleef_logd8_u10 , "log, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log10d8_u10, "log10, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_log1pd8_u10, "log1p, DP, 512", 0, 1e+300, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_logd8_u35 , "log, DP, 512", 0, 1e+300, 4.0, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSLEEF1_1(Sleef_expd8_u10 , "exp, DP, 512", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp2d8_u10 , "exp2, DP, 512", -700, 700, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_exp10d8_u10, "exp10, DP, 512", -700, 700, 1.0, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSLEEF1_2(Sleef_powd8_u10, "pow, DP, 512", -30, 30, -30, 30, 1.0, abufdp, bbufdp, vdouble); fillDP(abufdp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asind8_u10, "asin, DP, 512", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd8_u10, "acos, DP, 512", -1.0, 1.0, 1.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_asind8_u35, "asin, DP, 512", -1.0, 1.0, 4.0, abufdp, vdouble); callFuncSLEEF1_1(Sleef_acosd8_u35, "acos, DP, 512", -1.0, 1.0, 4.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSLEEF1_1(Sleef_atand8_u10, "atan, DP, 512", -10, 10, 1.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d8_u10, "atan2, DP, 512", -10, 10, -10, 10, 1.0, abufdp, bbufdp, vdouble); callFuncSLEEF1_1(Sleef_atand8_u35, "atan, DP, 512", -10, 10, 4.0, abufdp, vdouble); callFuncSLEEF1_2(Sleef_atan2d8_u35, "atan2, DP, 512", -10, 10, -10, 10, 4.0, abufdp, bbufdp, vdouble); } void benchSleef512_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSLEEF1_1(Sleef_sinf16_u10 , "sin, SP, 512", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf16_u10 , "cos, SP, 512", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf16_u10 , "tan, SP, 512", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf16_u10, "sincos, SP, 512", 0, 6.28, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf16_u35 , "sin, SP, 512", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf16_u35 , "cos, SP, 512", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf16_u35 , "tan, SP, 512", 0, 6.28, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf16_u35, "sincos, SP, 512", 0, 6.28, 4.0, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSLEEF1_1(Sleef_sinf16_u10 , "sin, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf16_u10 , "cos, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf16_u10 , "tan, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf16_u10, "sincos, SP, 512", 0, 1e+20, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sinf16_u35 , "sin, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_cosf16_u35 , "cos, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_tanf16_u35 , "tan, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_sincosf16_u35, "sincos, SP, 512", 0, 1e+20, 4.0, abufsp, vfloat); } void benchSleef512_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSLEEF1_1(Sleef_logf16_u10 , "log, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_log10f16_u10, "log10, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf16_u10, "log1p, SP, 512", 0, 1e+38, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_logf16_u35 , "log, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log10f16_u35, "log10, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat); //callFuncSLEEF1_1(Sleef_log1pf16_u35, "log1p, SP, 512", 0, 1e+38, 4.0, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSLEEF1_1(Sleef_expf16_u10 , "exp, SP, 512", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp2f16_u10 , "exp2, SP, 512", -100, 100, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_exp10f16_u10, "exp10, SP, 512", -100, 100, 1.0, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSLEEF1_2(Sleef_powf16_u10, "pow, SP, 512", -30, 30, -30, 30, 1.0, abufsp, bbufsp, vfloat); fillSP(abufsp, -1.0, 1.0); callFuncSLEEF1_1(Sleef_asinf16_u10, "asin, SP, 512", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf16_u10, "acos, SP, 512", -1.0, 1, 1.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_asinf16_u35, "asin, SP, 512", -1.0, 1.0, 4.0, abufsp, vfloat); callFuncSLEEF1_1(Sleef_acosf16_u35, "acos, SP, 512", -1.0, 1.0, 4.0, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSLEEF1_1(Sleef_atanf16_u10, "atan, SP, 512", -10, 10, 1.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f16_u10, "atan2, SP, 512", -10, 10, -10, 10, 1.0, abufsp, bbufsp, vfloat); callFuncSLEEF1_1(Sleef_atanf16_u35, "atan, SP, 512", -10, 10, 4.0, abufsp, vfloat); callFuncSLEEF1_2(Sleef_atan2f16_u35, "atan2, SP, 512", -10, 10, -10, 10, 4.0, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void benchSleef512_DPTrig() {} void benchSleef512_DPNontrig() {} void benchSleef512_SPTrig() {} void benchSleef512_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.3.1/src/libm-benchmarks/benchsvml.c000066400000000000000000000075461333715643700206160ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #include "bench.h" int veclen = 16; int enableLogExp; double *abufdp, *bbufdp; float *abufsp, *bbufsp; FILE *fp; #if defined(__i386__) || defined(__x86_64__) void x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx) { uint32_t a, b, c, d; __asm__ __volatile__ ("cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (eax), "c"(ecx)); out[0] = a; out[1] = b; out[2] = c; out[3] = d; } int cpuSupportsAVX() { int32_t reg[4]; x86CpuID(reg, 1, 0); return (reg[2] & (1 << 28)) != 0; } int cpuSupportsAVX512F() { int32_t reg[4]; x86CpuID(reg, 7, 0); return (reg[1] & (1 << 16)) != 0; } #endif uint64_t Sleef_currentTimeMicros() { struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (uint64_t)tp.tv_sec * 1000000LL + ((uint64_t)tp.tv_nsec/1000); } void fillDP(double *buf, double min, double max) { for(int i=0;i= 3) fnBase = argv[2]; srandom(time(NULL)); #if defined(__i386__) || defined(__x86_64__) int do128bit = 1; int do256bit = cpuSupportsAVX(); int do512bit = cpuSupportsAVX512F(); #elif defined(__ARM_NEON) int do128bit = 1; int do256bit = 0; int do512bit = 0; #else #error Unsupported architecture #endif posix_memalign((void **)&abufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double)); posix_memalign((void **)&bbufdp, veclen*sizeof(double), NITER1*veclen*sizeof(double)); abufsp = (float *)abufdp; bbufsp = (float *)bbufdp; enableLogExp = SVMLULP < 2; sprintf(fn, "%sdptrig%gulp.out", fnBase, (double)SVMLULP); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do256bit) zeroupper256(); if (do128bit) benchSVML128_DPTrig(); if (do256bit) benchSVML256_DPTrig(); if (do512bit) benchSVML512_DPTrig(); fclose(fp); sprintf(fn, "%sdpnontrig%gulp.out", fnBase, (double)SVMLULP); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do256bit) zeroupper256(); if (do128bit) benchSVML128_DPNontrig(); if (do256bit) benchSVML256_DPNontrig(); if (do512bit) benchSVML512_DPNontrig(); fclose(fp); sprintf(fn, "%ssptrig%gulp.out", fnBase, (double)SVMLULP); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do256bit) zeroupper256(); if (do128bit) benchSVML128_SPTrig(); if (do256bit) benchSVML256_SPTrig(); if (do512bit) benchSVML512_SPTrig(); fclose(fp); sprintf(fn, "%sspnontrig%gulp.out", fnBase, (double)SVMLULP); fp = fopen(fn, "w"); fprintf(fp, "%s\n", columnTitle); if (do256bit) zeroupper256(); if (do128bit) benchSVML128_SPNontrig(); if (do256bit) benchSVML256_SPNontrig(); if (do512bit) benchSVML512_SPNontrig(); fclose(fp); exit(0); } sleef-3.3.1/src/libm-benchmarks/benchsvml128.c000066400000000000000000000117211333715643700210370ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include uint64_t Sleef_currentTimeMicros(); void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern int enableLogExp; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __SSE2__ typedef __m128d vdouble; typedef __m128 vfloat; #define ENABLED #endif #ifdef ENABLED void benchSVML128_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 6.28, abufdp, vdouble); callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 6.28, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 1e+6, abufdp, vdouble); callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 1e+6, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSVML1_1(_mm_sin_pd , "sin, DP, 128", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm_cos_pd , "cos, DP, 128", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm_tan_pd , "tan, DP, 128", 0, 1e+100, abufdp, vdouble); callFuncSVML2_1(_mm_sincos_pd, "sincos, DP, 128", 0, 1e+100, abufdp, vdouble); } void benchSVML128_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSVML1_1(_mm_log_pd , "log, DP, 128", 0, 1e+300, abufdp, vdouble); if (enableLogExp) { callFuncSVML1_1(_mm_log10_pd, "log10, DP, 128", 0, 1e+300, abufdp, vdouble); callFuncSVML1_1(_mm_log1p_pd, "log1p, DP, 128", 0, 1e+300, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSVML1_1(_mm_exp_pd , "exp, DP, 128", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm_exp2_pd , "exp2, DP, 128", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm_exp10_pd, "exp10, DP, 128", -700, 700, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSVML1_2(_mm_pow_pd, "pow, DP, 128", -30, 30, -30, 30, abufdp, bbufdp, vdouble); } fillDP(abufdp, -1.0, 1.0); callFuncSVML1_1(_mm_asin_pd, "asin, DP, 128", -1.0, 1.0, abufdp, vdouble); callFuncSVML1_1(_mm_acos_pd, "acos, DP, 128", -1.0, 1.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSVML1_1(_mm_atan_pd, "atan, DP, 128", -10, 10, abufdp, vdouble); callFuncSVML1_2(_mm_atan2_pd, "atan2, DP, 128", -10, 10, -10, 10, abufdp, bbufdp, vdouble); } void benchSVML128_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSVML1_1(_mm_sin_ps , "sin, SP, 128", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm_cos_ps , "cos, SP, 128", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm_tan_ps , "tan, SP, 128", 0, 6.28, abufsp, vfloat); callFuncSVML2_1(_mm_sincos_ps, "sincos, SP, 128", 0, 6.28, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSVML1_1(_mm_sin_ps , "sin, SP, 128", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm_cos_ps , "cos, SP, 128", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm_tan_ps , "tan, SP, 128", 0, 1e+20, abufsp, vfloat); callFuncSVML2_1(_mm_sincos_ps, "sincos, SP, 128", 0, 1e+20, abufsp, vfloat); } void benchSVML128_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSVML1_1(_mm_log_ps , "log, SP, 128", 0, 1e+38, abufsp, vfloat); if (enableLogExp) { callFuncSVML1_1(_mm_log10_ps, "log10, SP, 128", 0, 1e+38, abufsp, vfloat); //callFuncSVML1_1(_mm_log1p_ps, "log1p, SP, 128", 0, 1e+38, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSVML1_1(_mm_exp_ps , "exp, SP, 128", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm_exp2_ps , "exp2, SP, 128", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm_exp10_ps, "exp10, SP, 128", -100, 100, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSVML1_2(_mm_pow_ps, "pow, SP, 128", -30, 30, -30, 30, abufsp, bbufsp, vfloat); } fillSP(abufsp, -1.0, 1.0); callFuncSVML1_1(_mm_asin_ps, "asin, SP, 128", -1.0, 1, abufsp, vfloat); callFuncSVML1_1(_mm_acos_ps, "acos, SP, 128", -1.0, 1, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSVML1_1(_mm_atan_ps, "atan, SP, 128", -10, 10, abufsp, vfloat); callFuncSVML1_2(_mm_atan2_ps, "atan2, SP, 128", -10, 10, -10, 10, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void benchSVML128_DPTrig() {} void benchSVML128_DPNontrig() {} void benchSVML128_SPTrig() {} void benchSVML128_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.3.1/src/libm-benchmarks/benchsvml256.c000066400000000000000000000122261333715643700210420ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include uint64_t Sleef_currentTimeMicros(); void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern int enableLogExp; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __AVX__ typedef __m256d vdouble; typedef __m256 vfloat; #define ENABLED #endif #ifdef ENABLED void zeroupper256() { _mm256_zeroupper(); } void benchSVML256_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 6.28, abufdp, vdouble); callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 6.28, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 1e+6, abufdp, vdouble); callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 1e+6, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSVML1_1(_mm256_sin_pd , "sin, DP, 256", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm256_cos_pd , "cos, DP, 256", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm256_tan_pd , "tan, DP, 256", 0, 1e+100, abufdp, vdouble); callFuncSVML2_1(_mm256_sincos_pd, "sincos, DP, 256", 0, 1e+100, abufdp, vdouble); } void benchSVML256_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSVML1_1(_mm256_log_pd , "log, DP, 256", 0, 1e+300, abufdp, vdouble); if (enableLogExp) { callFuncSVML1_1(_mm256_log10_pd, "log10, DP, 256", 0, 1e+300, abufdp, vdouble); callFuncSVML1_1(_mm256_log1p_pd, "log1p, DP, 256", 0, 1e+300, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSVML1_1(_mm256_exp_pd , "exp, DP, 256", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm256_exp2_pd , "exp2, DP, 256", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm256_exp10_pd, "exp10, DP, 256", -700, 700, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSVML1_2(_mm256_pow_pd, "pow, DP, 256", -30, 30, -30, 30, abufdp, bbufdp, vdouble); } fillDP(abufdp, -1.0, 1.0); callFuncSVML1_1(_mm256_asin_pd, "asin, DP, 256", -1.0, 1.0, abufdp, vdouble); callFuncSVML1_1(_mm256_acos_pd, "acos, DP, 256", -1.0, 1.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSVML1_1(_mm256_atan_pd, "atan, DP, 256", -10, 10, abufdp, vdouble); callFuncSVML1_2(_mm256_atan2_pd, "atan2, DP, 256", -10, 10, -10, 10, abufdp, bbufdp, vdouble); } void benchSVML256_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSVML1_1(_mm256_sin_ps , "sin, SP, 256", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm256_cos_ps , "cos, SP, 256", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm256_tan_ps , "tan, SP, 256", 0, 6.28, abufsp, vfloat); callFuncSVML2_1(_mm256_sincos_ps, "sincos, SP, 256", 0, 6.28, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSVML1_1(_mm256_sin_ps , "sin, SP, 256", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm256_cos_ps , "cos, SP, 256", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm256_tan_ps , "tan, SP, 256", 0, 1e+20, abufsp, vfloat); callFuncSVML2_1(_mm256_sincos_ps, "sincos, SP, 256", 0, 1e+20, abufsp, vfloat); } void benchSVML256_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSVML1_1(_mm256_log_ps , "log, SP, 256", 0, 1e+38, abufsp, vfloat); if (enableLogExp) { callFuncSVML1_1(_mm256_log10_ps, "log10, SP, 256", 0, 1e+38, abufsp, vfloat); //callFuncSVML1_1(_mm256_log1p_ps, "log1p, SP, 256", 0, 1e+38, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSVML1_1(_mm256_exp_ps , "exp, SP, 256", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm256_exp2_ps , "exp2, SP, 256", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm256_exp10_ps, "exp10, SP, 256", -100, 100, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSVML1_2(_mm256_pow_ps, "pow, SP, 256", -30, 30, -30, 30, abufsp, bbufsp, vfloat); } fillSP(abufsp, -1.0, 1.0); callFuncSVML1_1(_mm256_asin_ps, "asin, SP, 256", -1.0, 1, abufsp, vfloat); callFuncSVML1_1(_mm256_acos_ps, "acos, SP, 256", -1.0, 1, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSVML1_1(_mm256_atan_ps, "atan, SP, 256", -10, 10, abufsp, vfloat); callFuncSVML1_2(_mm256_atan2_ps, "atan2, SP, 256", -10, 10, -10, 10, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void zeroupper256() {} void benchSVML256_DPTrig() {} void benchSVML256_DPNontrig() {} void benchSVML256_SPTrig() {} void benchSVML256_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.3.1/src/libm-benchmarks/benchsvml512.c000066400000000000000000000121301333715643700210270ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include uint64_t Sleef_currentTimeMicros(); void fillDP(double *buf, double min, double max); void fillSP(float *buf, double min, double max); extern char x86BrandString[256], versionString[1024]; extern int veclen; extern int enableLogExp; extern double *abufdp, *bbufdp; extern float *abufsp, *bbufsp; extern FILE *fp; #include "bench.h" #ifdef __AVX512F__ typedef __m512d vdouble; typedef __m512 vfloat; #define ENABLED #endif #ifdef ENABLED void benchSVML512_DPTrig() { fillDP(abufdp, 0, 6.28); callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 6.28, abufdp, vdouble); callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 6.28, abufdp, vdouble); callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 6.28, abufdp, vdouble); fillDP(abufdp, 0, 1e+6); callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 1e+6, abufdp, vdouble); callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 1e+6, abufdp, vdouble); callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 1e+6, abufdp, vdouble); fillDP(abufdp, 0, 1e+100); callFuncSVML1_1(_mm512_sin_pd , "sin, DP, 512", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm512_cos_pd , "cos, DP, 512", 0, 1e+100, abufdp, vdouble); callFuncSVML1_1(_mm512_tan_pd , "tan, DP, 512", 0, 1e+100, abufdp, vdouble); callFuncSVML2_1(_mm512_sincos_pd, "sincos, DP, 512", 0, 1e+100, abufdp, vdouble); } void benchSVML512_DPNontrig() { fillDP(abufdp, 0, 1e+300); callFuncSVML1_1(_mm512_log_pd , "log, DP, 512", 0, 1e+300, abufdp, vdouble); if (enableLogExp) { callFuncSVML1_1(_mm512_log10_pd, "log10, DP, 512", 0, 1e+300, abufdp, vdouble); callFuncSVML1_1(_mm512_log1p_pd, "log1p, DP, 512", 0, 1e+300, abufdp, vdouble); fillDP(abufdp, -700, 700); callFuncSVML1_1(_mm512_exp_pd , "exp, DP, 512", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm512_exp2_pd , "exp2, DP, 512", -700, 700, abufdp, vdouble); callFuncSVML1_1(_mm512_exp10_pd, "exp10, DP, 512", -700, 700, abufdp, vdouble); fillDP(abufdp, -30, 30); fillDP(bbufdp, -30, 30); callFuncSVML1_2(_mm512_pow_pd, "pow, DP, 512", -30, 30, -30, 30, abufdp, bbufdp, vdouble); } fillDP(abufdp, -1.0, 1.0); callFuncSVML1_1(_mm512_asin_pd, "asin, DP, 512", -1.0, 1.0, abufdp, vdouble); callFuncSVML1_1(_mm512_acos_pd, "acos, DP, 512", -1.0, 1.0, abufdp, vdouble); fillDP(abufdp, -10, 10); fillDP(bbufdp, -10, 10); callFuncSVML1_1(_mm512_atan_pd, "atan, DP, 512", -10, 10, abufdp, vdouble); callFuncSVML1_2(_mm512_atan2_pd, "atan2, DP, 512", -10, 10, -10, 10, abufdp, bbufdp, vdouble); } void benchSVML512_SPTrig() { fillSP(abufsp, 0, 6.28); callFuncSVML1_1(_mm512_sin_ps , "sin, SP, 512", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm512_cos_ps , "cos, SP, 512", 0, 6.28, abufsp, vfloat); callFuncSVML1_1(_mm512_tan_ps , "tan, SP, 512", 0, 6.28, abufsp, vfloat); callFuncSVML2_1(_mm512_sincos_ps, "sincos, SP, 512", 0, 6.28, abufsp, vfloat); fillSP(abufsp, 0, 1e+20); callFuncSVML1_1(_mm512_sin_ps , "sin, SP, 512", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm512_cos_ps , "cos, SP, 512", 0, 1e+20, abufsp, vfloat); callFuncSVML1_1(_mm512_tan_ps , "tan, SP, 512", 0, 1e+20, abufsp, vfloat); callFuncSVML2_1(_mm512_sincos_ps, "sincos, SP, 512", 0, 1e+20, abufsp, vfloat); } void benchSVML512_SPNontrig() { fillSP(abufsp, 0, 1e+38); callFuncSVML1_1(_mm512_log_ps , "log, SP, 512", 0, 1e+38, abufsp, vfloat); if (enableLogExp) { callFuncSVML1_1(_mm512_log10_ps, "log10, SP, 512", 0, 1e+38, abufsp, vfloat); //callFuncSVML1_1(_mm512_log1p_ps, "log1p, SP, 512", 0, 1e+38, abufsp, vfloat); fillSP(abufsp, -100, 100); callFuncSVML1_1(_mm512_exp_ps , "exp, SP, 512", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm512_exp2_ps , "exp2, SP, 512", -100, 100, abufsp, vfloat); callFuncSVML1_1(_mm512_exp10_ps, "exp10, SP, 512", -100, 100, abufsp, vfloat); fillSP(abufsp, -30, 30); fillSP(bbufsp, -30, 30); callFuncSVML1_2(_mm512_pow_ps, "pow, SP, 512", -30, 30, -30, 30, abufsp, bbufsp, vfloat); } fillSP(abufsp, -1.0, 1.0); callFuncSVML1_1(_mm512_asin_ps, "asin, SP, 512", -1.0, 1, abufsp, vfloat); callFuncSVML1_1(_mm512_acos_ps, "acos, SP, 512", -1.0, 1, abufsp, vfloat); fillSP(abufsp, -10, 10); fillSP(bbufsp, -10, 10); callFuncSVML1_1(_mm512_atan_ps, "atan, SP, 512", -10, 10, abufsp, vfloat); callFuncSVML1_2(_mm512_atan2_ps, "atan2, SP, 512", -10, 10, -10, 10, abufsp, bbufsp, vfloat); } #else // #ifdef ENABLED void benchSVML512_DPTrig() {} void benchSVML512_DPNontrig() {} void benchSVML512_SPTrig() {} void benchSVML512_SPNontrig() {} #endif // #ifdef ENABLED sleef-3.3.1/src/libm-benchmarks/measure.sh000066400000000000000000000005071333715643700204540ustar00rootroot00000000000000#!/bin/sh echo read -p "Enter label of measurement(e.g. My desktop PC) : " label if [ -f counter.txt ] then counter=`cat counter.txt` else counter=0 fi echo Measurement in progress. This may take several minutes. for i in $*; do $i "$label" $counter done counter=$((counter+1)) echo $counter > counter.txt sleef-3.3.1/src/libm-tester/000077500000000000000000000000001333715643700156465ustar00rootroot00000000000000sleef-3.3.1/src/libm-tester/CMakeLists.txt000066400000000000000000000175551333715643700204230ustar00rootroot00000000000000# Note: We are assuming SLEEF is the CMake root project. # TODO: Remove constraint: do not use CMAKE_BINARY_DIR and CMAKE_SOURCE_DIR link_directories(${CMAKE_BINARY_DIR}/lib) # libsleef link_directories(${CMAKE_BINARY_DIR}/src/common) # common.a include_directories(${CMAKE_BINARY_DIR}/include) # sleef.h include_directories(${CMAKE_SOURCE_DIR}/src/libm) # rename.h include_directories(${CMAKE_BINARY_DIR}/src/libm/include) # rename headers if(NOT LIB_MPFR) find_program(TESTER_COMMAND tester) endif(NOT LIB_MPFR) find_library(LIBRT rt) if (NOT LIBRT) set(LIBRT "") endif() set(CMAKE_C_FLAGS ORG_CMAKE_C_FLAGS) string(CONCAT CMAKE_C_FLAGS ${SLEEF_C_FLAGS}) # function(add_test_iut IUT) if (LIB_MPFR) set(TESTER ${TARGET_TESTER}) elseif(TESTER_COMMAND) set(TESTER ${TESTER_COMMAND}) endif() # When we are crosscompiling using the mkrename* tools from a native # build, we use the tester executable from the native build. if (CMAKE_CROSSCOMPILING AND NATIVE_BUILD_DIR) set(TESTER ${NATIVE_BUILD_DIR}/bin/${TARGET_TESTER}) endif(CMAKE_CROSSCOMPILING AND NATIVE_BUILD_DIR) if (TESTER) if (NOT EMULATOR) if (SDE_COMMAND) set(FLAGS_SDE "--sde" ${SDE_COMMAND}) else() set(FLAGS_SDE) endif() if (ARMIE_COMMAND) set(FLAGS_ARMIE ${ARMIE_COMMAND} -msve-vector-bits=${SVE_VECTOR_BITS}) else() set(FLAGS_ARMIE) endif() add_test(NAME ${IUT} COMMAND ${TESTER} ${FLAGS_SDE} ${FLAGS_ARMIE} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${IUT} WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) else() add_test(NAME ${IUT} COMMAND ${TESTER} ${EMULATOR} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${IUT} WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) endif() endif() endfunction() # Compile executable 'iut' add_executable(${TARGET_IUT} iut.c testerutil.c) target_compile_definitions(${TARGET_IUT} PRIVATE ${COMMON_TARGET_DEFINITIONS}) target_link_libraries(${TARGET_IUT} ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT}) set_target_properties(${TARGET_IUT} PROPERTIES C_STANDARD 99) add_test_iut(${TARGET_IUT}) set(IUT_LIST ${TARGET_IUT}) set(IUT_SRC iutsimd.c iutsimdmain.c testerutil) # Add vector extension `iut`s macro(test_extension SIMD) if(COMPILER_SUPPORTS_${SIMD}) add_executable(${TARGET_IUT${SIMD}} ${IUT_SRC}) target_compile_options(${TARGET_IUT${SIMD}} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${TARGET_IUT${SIMD}} PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) target_link_libraries(${TARGET_IUT${SIMD}} ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT}) add_dependencies(${TARGET_IUT${SIMD}} ${TARGET_HEADERS}) add_dependencies(${TARGET_IUT${SIMD}} ${TARGET_LIBSLEEF}) set_target_properties(${TARGET_IUT${SIMD}} PROPERTIES C_STANDARD 99) add_test_iut(${TARGET_IUT${SIMD}}) list(APPEND IUT_LIST ${TARGET_IUT${SIMD}}) if(LIB_MPFR AND NOT ${SIMD} STREQUAL NEON32 AND NOT MINGW) # Build tester2 SIMD string(TOLOWER ${SIMD} SCSIMD) foreach(P dp sp) set(T "tester2${SCSIMD}${P}") add_executable(${T} tester2simd${P}.c testerutil.c) target_compile_options(${T} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_compile_definitions(${T} PRIVATE ENABLE_${SIMD}=1 USEMPFR=1 ${COMMON_TARGET_DEFINITIONS}) set_target_properties(${T} PROPERTIES C_STANDARD 99) target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIB_MPFR} ${LIBM} ${LIBGMP}) add_dependencies(${T} ${TARGET_HEADERS}) add_dependencies(${T} ${TARGET_LIBSLEEF}) if (MPFR_INCLUDE_DIR) target_include_directories(${T} PRIVATE ${MPFR_INCLUDE_DIR}) endif() endforeach() endif() endif(COMPILER_SUPPORTS_${SIMD}) endmacro(test_extension) foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) test_extension(${SIMD}) endforeach() function(add_gnuabi_compatibility_test SIMD MASKED) if (MASKED) set(GNUABI_COMPATIBILITY_TEST gnuabi_compatibility_${SIMD}_masked) else(MASKED) set(GNUABI_COMPATIBILITY_TEST gnuabi_compatibility_${SIMD}) endif(MASKED) add_executable(${GNUABI_COMPATIBILITY_TEST} gnuabi_compatibility.c) set_target_properties(${GNUABI_COMPATIBILITY_TEST} PROPERTIES C_STANDARD 99) target_compile_options(${GNUABI_COMPATIBILITY_TEST} PRIVATE ${FLAGS_ENABLE_${SIMD}}) if (MASKED) target_compile_definitions(${GNUABI_COMPATIBILITY_TEST} PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} MASKED_GNUABI=1) else(MASKED) target_compile_definitions(${GNUABI_COMPATIBILITY_TEST} PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS}) endif(MASKED) target_link_libraries(${GNUABI_COMPATIBILITY_TEST} ${TARGET_LIBSLEEFGNUABI} ${LIBM}) # These are linker tests that don't really need to be executed, # but seeing them in the report of ctest gives an idea of what # has been built for testing. if (EMULATOR) add_test(NAME ${GNUABI_COMPATIBILITY_TEST} COMMAND ${EMULATOR} $ WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) elseif(SDE_COMMAND) add_test(NAME ${GNUABI_COMPATIBILITY_TEST} COMMAND ${SDE_COMMAND} "--" $) else() add_test(NAME ${GNUABI_COMPATIBILITY_TEST} COMMAND $) endif(EMULATOR) endfunction(add_gnuabi_compatibility_test) if(ENABLE_GNUABI) foreach(SIMD ${SLEEF_SUPPORTED_GNUABI_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) # GNUABI compatibility for the unmasked symbols. add_gnuabi_compatibility_test(${SIMD} OFF) # GNUABI compatibility for the masked symbols. if (MKMASKED_PARAMS_GNUABI_${SIMD}_sp) add_gnuabi_compatibility_test(${SIMD} ON) endif(MKMASKED_PARAMS_GNUABI_${SIMD}_sp) endif (COMPILER_SUPPORTS_${SIMD}) endforeach(SIMD ${SLEEF_SUPPORTED_GNUABI_EXTENSIONS}) endif(ENABLE_GNUABI) if (SLEEF_ARCH_X86) # iutdsp128 add_executable(iutdsp128 ${IUT_SRC}) target_compile_definitions(iutdsp128 PRIVATE ENABLE_DSP128=1 ${COMMON_TARGET_DEFINITIONS}) target_compile_options(iutdsp128 PRIVATE ${FLAGS_ENABLE_SSE2}) target_link_libraries(iutdsp128 ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT}) add_dependencies(iutdsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF}) add_test_iut(iutdsp128) list(APPEND IUT_LIST iutdsp128) # iutdsp256 add_executable(iutdsp256 ${IUT_SRC}) target_compile_definitions(iutdsp256 PRIVATE ENABLE_DSP256=1 ${COMMON_TARGET_DEFINITIONS}) target_compile_options(iutdsp256 PRIVATE ${FLAGS_ENABLE_AVX}) target_link_libraries(iutdsp256 ${TARGET_LIBSLEEF} ${LIBM} ${LIBRT}) add_dependencies(iutdsp256 ${TARGET_HEADERS} ${TARGET_LIBSLEEF}) add_test_iut(iutdsp256) list(APPEND IUT_LIST iutdsp256) endif(SLEEF_ARCH_X86) if(LIB_MPFR AND NOT MINGW) # Build tester2 scalar foreach(P dp sp) set(T "tester2${P}") add_executable(${T} tester2${P}.c testerutil.c) target_compile_definitions(${T} PRIVATE USEMPFR=1 ${COMMON_TARGET_DEFINITIONS}) if (MPFR_INCLUDE_DIR) target_include_directories(${T} PRIVATE ${MPFR_INCLUDE_DIR}) endif() target_link_libraries(${T} ${TARGET_LIBSLEEF} ${LIB_MPFR} ${LIBM} ${LIBGMP}) add_dependencies(${T} ${TARGET_HEADERS}) add_dependencies(${T} ${TARGET_LIBSLEEF}) endforeach() # No test defined with tester2 # Compile executable 'tester' add_host_executable(${TARGET_TESTER} tester.c testerutil.c) if (NOT CMAKE_CROSSCOMPILING) target_link_libraries(${TARGET_TESTER} ${LIB_MPFR} ${TARGET_LIBSLEEF} ${LIBM} ${LIBGMP}) target_compile_definitions(${TARGET_TESTER} PRIVATE USEMPFR=1 ${COMMON_TARGET_DEFINITIONS}) target_compile_options(${TARGET_TESTER} PRIVATE -Wno-unused-result) set_target_properties(${TARGET_TESTER} PROPERTIES C_STANDARD 99) if (MPFR_INCLUDE_DIR) target_include_directories(${TARGET_TESTER} PRIVATE ${MPFR_INCLUDE_DIR}) endif() endif() endif(LIB_MPFR AND NOT MINGW) # Tests depends on the library add_dependencies(${TARGET_IUT} ${TARGET_HEADERS}) sleef-3.3.1/src/libm-tester/gnuabi_compatibility.c000066400000000000000000000255211333715643700222150ustar00rootroot00000000000000/// This program makes sure that all the symbols that a /// GNUABI-compatible compiler (clang or gcc) can generate when /// vectorizing functions call from `#include ` are present in /// `libsleefgnuabi.so`. /// /// The header `math.h` is not the same on all systems, and different /// macros can activate different sets of functions. The list provide /// here shoudl cover the union of all possible systems that we want /// to support. In particular, the test is checking that the "finite" /// symmbols from `#include ` are present for /// those systems supporting them. #include #include #include #if defined(ENABLE_SSE4) || defined(ENABLE_SSE2) #define ISA_TOKEN b #define VLEN_SP 4 #define VLEN_DP 2 #endif /* defined(ENABLE_SSE4) || defined(ENABLE_SSE2) */ #ifdef ENABLE_AVX #define ISA_TOKEN c #define VLEN_SP 8 #define VLEN_DP 4 #endif /* ENABLE_AVX */ #ifdef ENABLE_AVX2 #define ISA_TOKEN d #define VLEN_SP 8 #define VLEN_DP 4 #endif /* ENABLE_AVX2 */ #ifdef ENABLE_AVX512F #define ISA_TOKEN e #define VLEN_SP 16 #define VLEN_DP 8 #endif /* ENABLE_AVX512F */ #ifdef ENABLE_ADVSIMD #define ISA_TOKEN n #define VLEN_SP 4 #define VLEN_DP 2 #endif /* ENABLE_ADVSIMDF */ #ifdef ENABLE_SVE #include #define ISA_TOKEN s #define VLEN_SP (svcntw()) #define VLEN_DP (svcntd()) #define VLA_TOKEN x #endif /* ENABLE_SVE */ // GNUABI name mangling macro. #ifndef MASKED_GNUABI #define __MAKE_FN_NAME(name, t, vl, p) _ZGV##t##N##vl##p##_##name #else /* MASKED_GNUABI */ #define __MAKE_FN_NAME(name, t, vl, p) _ZGV##t##M##vl##p##_##name #endif /* MASKED_GNUABI */ // Level-1 expansion macros for declaration and call. The signature of // each function has three input paramters to avoid segfaults of // sincos-like functions that are effectively loading data from // memory. #define __DECLARE(name, t, vl, p) \ void __MAKE_FN_NAME(name, t, vl, p)(int *, int *, int *) #define __CALL(name, t, vl, p) __MAKE_FN_NAME(name, t, vl, p)(b0, b1, b2) // Make sure that the architectural macros are defined for each vector // extension. #ifndef ISA_TOKEN #error "Missing ISA token" #endif #ifndef VLEN_DP #error "Missing VLEN_DP" #endif #ifndef VLEN_DP #error "Missing VLEN_SP" #endif #if defined(ENABLE_SVE) && !defined(VLA_TOKEN) #error "Missing VLA_TOKEN" #endif /* defined(ENABLE_SVE) && !defined(VLA_TOKEN) */ // Declaration and call, first level expantion to pick up the // ISA_TOKEN and VLEN_* architectural macros. #ifndef ENABLE_SVE #define DECLARE_DP(name, p) __DECLARE(name, ISA_TOKEN, VLEN_DP, p) #define CALL_DP(name, p) __CALL(name, ISA_TOKEN, VLEN_DP, p) #else /* ENABLE_SVE */ #define DECLARE_DP(name, p) __DECLARE(name, ISA_TOKEN, VLA_TOKEN, p) #define CALL_DP(name, p) __CALL(name, ISA_TOKEN, VLA_TOKEN, p) #endif /* ENABLE_SVE */ // Douple precision function declarations. DECLARE_DP(__acos_finite, v); DECLARE_DP(__acosh_finite, v); DECLARE_DP(__asin_finite, v); DECLARE_DP(__atan2_finite, vv); DECLARE_DP(__atanh_finite, v); DECLARE_DP(__cosh_finite, v); DECLARE_DP(__exp10_finite, v); DECLARE_DP(__exp2_finite, v); DECLARE_DP(__exp_finite, v); DECLARE_DP(__fmod_finite, vv); DECLARE_DP(__modf_finite, vl8); DECLARE_DP(__hypot_finite, vv); DECLARE_DP(__log10_finite, v); // DECLARE_DP(__log2_finite,v); DECLARE_DP(__log_finite, v); DECLARE_DP(__pow_finite, vv); DECLARE_DP(__sinh_finite, v); DECLARE_DP(__sqrt_finite, v); DECLARE_DP(acos, v); DECLARE_DP(acosh, v); DECLARE_DP(asin, v); DECLARE_DP(asinh, v); DECLARE_DP(atan, v); DECLARE_DP(atan2, vv); DECLARE_DP(__atan2_finite, vv); DECLARE_DP(atanh, v); DECLARE_DP(cbrt, v); DECLARE_DP(ceil, v); DECLARE_DP(copysign, vv); DECLARE_DP(cos, v); DECLARE_DP(cosh, v); DECLARE_DP(cospi, v); DECLARE_DP(erf, v); DECLARE_DP(erfc, v); DECLARE_DP(exp, v); DECLARE_DP(exp10, v); DECLARE_DP(exp2, v); DECLARE_DP(expfrexp, v); DECLARE_DP(expm1, v); DECLARE_DP(fabs, v); DECLARE_DP(fdim, vv); DECLARE_DP(floor, v); DECLARE_DP(fma, vvv); DECLARE_DP(fmax, vv); DECLARE_DP(fmin, vv); DECLARE_DP(fmod, vv); DECLARE_DP(frfrexp, v); DECLARE_DP(hypot, vv); DECLARE_DP(ilogb, v); DECLARE_DP(ldexp, vv); DECLARE_DP(lgamma, v); DECLARE_DP(log, v); DECLARE_DP(log10, v); DECLARE_DP(log1p, v); DECLARE_DP(log2, v); DECLARE_DP(modf, vl8); DECLARE_DP(nextafter, vv); DECLARE_DP(pow, vv); DECLARE_DP(rint, v); DECLARE_DP(round, v); DECLARE_DP(sin, v); DECLARE_DP(sincos, vl8l8); DECLARE_DP(sincospi, vl8l8); DECLARE_DP(sinh, v); DECLARE_DP(sinpi, v); DECLARE_DP(sqrt, v); DECLARE_DP(tan, v); DECLARE_DP(tanh, v); DECLARE_DP(tgamma, v); DECLARE_DP(trunc, v); #ifndef ENABLE_SVE #define DECLARE_SP(name, p) __DECLARE(name, ISA_TOKEN, VLEN_SP, p) #define CALL_SP(name, p) __CALL(name, ISA_TOKEN, VLEN_SP, p) #else /* ENABLE_SVE */ #define DECLARE_SP(name, p) __DECLARE(name, ISA_TOKEN, VLA_TOKEN, p) #define CALL_SP(name, p) __CALL(name, ISA_TOKEN, VLA_TOKEN, p) #endif /* ENABLE_SVE */ // Single precision function declarations. DECLARE_SP(__acosf_finite, v); DECLARE_SP(__acoshf_finite, v); DECLARE_SP(__asinf_finite, v); DECLARE_SP(__atan2f_finite, vv); DECLARE_SP(__atanhf_finite, v); DECLARE_SP(__coshf_finite, v); DECLARE_SP(__exp10f_finite, v); DECLARE_SP(__exp2f_finite, v); DECLARE_SP(__expf_finite, v); DECLARE_SP(__fmodf_finite, vv); DECLARE_SP(__modff_finite, vl4); DECLARE_SP(__hypotf_finite, vv); DECLARE_SP(__log10f_finite, v); // DECLARE_SP(__log2f_finite,v); DECLARE_SP(__logf_finite, v); DECLARE_SP(__powf_finite, vv); DECLARE_SP(__sinhf_finite, v); DECLARE_SP(__sqrtf_finite, v); DECLARE_SP(acosf, v); DECLARE_SP(acoshf, v); DECLARE_SP(asinf, v); DECLARE_SP(asinhf, v); DECLARE_SP(atanf, v); DECLARE_SP(atan2f, vv); DECLARE_SP(atanhf, v); DECLARE_SP(cbrtf, v); DECLARE_SP(ceilf, v); DECLARE_SP(copysignf, vv); DECLARE_SP(cosf, v); DECLARE_SP(coshf, v); DECLARE_SP(cospif, v); DECLARE_SP(erff, v); DECLARE_SP(erfcf, v); DECLARE_SP(expf, v); DECLARE_SP(exp10f, v); DECLARE_SP(exp2f, v); DECLARE_SP(expm1f, v); DECLARE_SP(fabsf, v); DECLARE_SP(fdimf, vv); DECLARE_SP(floorf, v); DECLARE_SP(fmaf, vvv); DECLARE_SP(fmaxf, vv); DECLARE_SP(fminf, vv); DECLARE_SP(fmodf, vv); DECLARE_SP(frfrexpf, v); DECLARE_SP(hypotf, vv); #ifndef ENABLE_AVX DECLARE_SP(expfrexpf, v); DECLARE_SP(ilogbf, v); #endif DECLARE_SP(ldexpf, vv); DECLARE_SP(lgammaf, v); DECLARE_SP(logf, v); DECLARE_SP(log10f, v); DECLARE_SP(log1pf, v); DECLARE_SP(log2f, v); DECLARE_SP(modff, vl4); DECLARE_SP(nextafterf, vv); DECLARE_SP(powf, vv); DECLARE_SP(rintf, v); DECLARE_SP(roundf, v); DECLARE_SP(sinf, v); DECLARE_SP(sincosf, vl4l4); DECLARE_SP(sincospif, vl4l4); DECLARE_SP(sinhf, v); DECLARE_SP(sinpif, v); DECLARE_SP(sqrtf, v); DECLARE_SP(tanf, v); DECLARE_SP(tanhf, v); DECLARE_SP(tgammaf, v); DECLARE_SP(truncf, v); static jmp_buf sigjmp; static void sighandler(int signum) { longjmp(sigjmp, 1); } int detectFeature() { signal(SIGILL, sighandler); if (setjmp(sigjmp) == 0) { int b0[VLEN_SP]; int b1[VLEN_SP]; int b2[VLEN_SP]; CALL_DP(__acos_finite, v); signal(SIGILL, SIG_DFL); return 1; } else { signal(SIGILL, SIG_DFL); return 0; } } int main(void) { if (!detectFeature()) { return 0; } // Allocate enough memory to make sure that sincos-like functions can // load a full vector when invoked. All functions must operate on // these variables, which are printed at the end of the execution to // make sure that the compiler doesn't optimize out the calls. int b0[VLEN_SP]; int b1[VLEN_SP]; int b2[VLEN_SP]; // Double precision function call. CALL_DP(__acos_finite, v); CALL_DP(__acosh_finite, v); CALL_DP(__asin_finite, v); CALL_DP(__atan2_finite, vv); CALL_DP(__atanh_finite, v); CALL_DP(__cosh_finite, v); CALL_DP(__exp10_finite, v); CALL_DP(__exp2_finite, v); CALL_DP(__exp_finite, v); CALL_DP(__fmod_finite, vv); CALL_DP(__modf_finite, vl8); CALL_DP(__hypot_finite, vv); CALL_DP(__log10_finite, v); // CALL_DP(__log2_finite,v); CALL_DP(__log_finite, v); CALL_DP(__pow_finite, vv); CALL_DP(__sinh_finite, v); CALL_DP(__sqrt_finite, v); CALL_DP(acos, v); CALL_DP(acosh, v); CALL_DP(asin, v); CALL_DP(asinh, v); CALL_DP(atan, v); CALL_DP(atan2, vv); CALL_DP(atanh, v); CALL_DP(cbrt, v); CALL_DP(ceil, v); CALL_DP(copysign, vv); CALL_DP(cos, v); CALL_DP(cosh, v); CALL_DP(cospi, v); CALL_DP(erf, v); CALL_DP(erfc, v); CALL_DP(exp, v); CALL_DP(exp10, v); CALL_DP(exp2, v); CALL_DP(expfrexp, v); CALL_DP(expm1, v); CALL_DP(fabs, v); CALL_DP(fdim, vv); CALL_DP(floor, v); CALL_DP(fma, vvv); CALL_DP(fmax, vv); CALL_DP(fmin, vv); CALL_DP(fmod, vv); CALL_DP(frfrexp, v); CALL_DP(hypot, vv); CALL_DP(ilogb, v); CALL_DP(ldexp, vv); CALL_DP(lgamma, v); CALL_DP(log, v); CALL_DP(log10, v); CALL_DP(log1p, v); CALL_DP(log2, v); CALL_DP(modf, vl8); CALL_DP(nextafter, vv); CALL_DP(pow, vv); CALL_DP(rint, v); CALL_DP(round, v); CALL_DP(sin, v); CALL_DP(sincos, vl8l8); CALL_DP(sincospi, vl8l8); CALL_DP(sinh, v); CALL_DP(sinpi, v); CALL_DP(sqrt, v); CALL_DP(tan, v); CALL_DP(tanh, v); CALL_DP(tgamma, v); CALL_DP(trunc, v); // Single precision function call. CALL_SP(__acosf_finite, v); CALL_SP(__acoshf_finite, v); CALL_SP(__asinf_finite, v); CALL_SP(__atan2f_finite, vv); CALL_SP(__atanhf_finite, v); CALL_SP(__coshf_finite, v); CALL_SP(__exp10f_finite, v); CALL_SP(__exp2f_finite, v); CALL_SP(__expf_finite, v); CALL_SP(__fmodf_finite, vv); CALL_SP(__modff_finite, vl4); CALL_SP(__hypotf_finite, vv); CALL_SP(__log10f_finite, v); // CALL_SP(__log2f_finite,v); CALL_SP(__logf_finite, v); CALL_SP(__powf_finite, vv); CALL_SP(__sinhf_finite, v); CALL_SP(__sqrtf_finite, v); CALL_SP(acosf, v); CALL_SP(acoshf, v); CALL_SP(asinf, v); CALL_SP(asinhf, v); CALL_SP(atanf, v); CALL_SP(atan2f, vv); CALL_SP(atanhf, v); CALL_SP(cbrtf, v); CALL_SP(ceilf, v); CALL_SP(copysignf, vv); CALL_SP(cosf, v); CALL_SP(coshf, v); CALL_SP(cospif, v); CALL_SP(erff, v); CALL_SP(erfcf, v); CALL_SP(expf, v); CALL_SP(exp10f, v); CALL_SP(exp2f, v); CALL_SP(expm1f, v); CALL_SP(fabsf, v); CALL_SP(fdimf, vv); CALL_SP(floorf, v); CALL_SP(fmaf, vvv); CALL_SP(fmaxf, vv); CALL_SP(fminf, vv); CALL_SP(fmodf, vv); CALL_SP(frfrexpf, v); CALL_SP(hypotf, vv); #ifndef ENABLE_AVX CALL_SP(expfrexpf, v); CALL_SP(ilogbf, v); #endif CALL_SP(ldexpf, vv); CALL_SP(lgammaf, v); CALL_SP(logf, v); CALL_SP(log10f, v); CALL_SP(log1pf, v); CALL_SP(log2f, v); CALL_SP(modff, vl4); CALL_SP(nextafterf, vv); CALL_SP(powf, vv); CALL_SP(rintf, v); CALL_SP(roundf, v); CALL_SP(sinf, v); CALL_SP(sincosf, vl4l4); CALL_SP(sincospif, vl4l4); CALL_SP(sinhf, v); CALL_SP(sinpif, v); CALL_SP(sqrtf, v); CALL_SP(tanf, v); CALL_SP(tanhf, v); CALL_SP(tgammaf, v); CALL_SP(truncf, v); // Print the vars to make sure the compiler does not remove the // calls. for (int i = 0; i < VLEN_SP; ++i) printf("%d %d %d\n", b0[i], b1[i], b2[i]); return 0; } sleef-3.3.1/src/libm-tester/iut.c000066400000000000000000000566651333715643700166350ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #define STDIN_FILENO 0 #else #include #include #include #endif #include "sleef.h" #include "testerutil.h" #define DORENAME #include "rename.h" #define BUFSIZE 1024 int main(int argc, char **argv) { char buf[BUFSIZE]; printf("3\n"); fflush(stdout); //fprintf(stderr, "IUT start\n"); for(;;) { if (readln(STDIN_FILENO, buf, BUFSIZE-1) < 1) break; //fprintf(stderr, "iut: got %s\n", buf); if (startsWith(buf, "sin ")) { uint64_t u; sscanf(buf, "sin %" PRIx64, &u); u = d2u(xsin(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sin_u1 ")) { uint64_t u; sscanf(buf, "sin_u1 %" PRIx64, &u); u = d2u(xsin_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cos ")) { uint64_t u; sscanf(buf, "cos %" PRIx64, &u); u = d2u(xcos(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cos_u1 ")) { uint64_t u; sscanf(buf, "cos_u1 %" PRIx64, &u); u = d2u(xcos_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sincos ")) { uint64_t u; sscanf(buf, "sincos %" PRIx64, &u); Sleef_double2 x = xsincos(u2d(u)); printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y)); } else if (startsWith(buf, "sincos_u1 ")) { uint64_t u; sscanf(buf, "sincos_u1 %" PRIx64, &u); Sleef_double2 x = xsincos_u1(u2d(u)); printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y)); } else if (startsWith(buf, "sincospi_u05 ")) { uint64_t u; sscanf(buf, "sincospi_u05 %" PRIx64, &u); Sleef_double2 x = xsincospi_u05(u2d(u)); printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y)); } else if (startsWith(buf, "sincospi_u35 ")) { uint64_t u; sscanf(buf, "sincospi_u35 %" PRIx64, &u); Sleef_double2 x = xsincospi_u35(u2d(u)); printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y)); } else if (startsWith(buf, "sinpi_u05 ")) { uint64_t u; sscanf(buf, "sinpi_u05 %" PRIx64, &u); u = d2u(xsinpi_u05(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cospi_u05 ")) { uint64_t u; sscanf(buf, "cospi_u05 %" PRIx64, &u); u = d2u(xcospi_u05(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "tan ")) { uint64_t u; sscanf(buf, "tan %" PRIx64, &u); u = d2u(xtan(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "tan_u1 ")) { uint64_t u; sscanf(buf, "tan_u1 %" PRIx64, &u); u = d2u(xtan_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "asin ")) { uint64_t u; sscanf(buf, "asin %" PRIx64, &u); u = d2u(xasin(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "acos ")) { uint64_t u; sscanf(buf, "acos %" PRIx64, &u); u = d2u(xacos(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "atan ")) { uint64_t u; sscanf(buf, "atan %" PRIx64, &u); u = d2u(xatan(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log ")) { uint64_t u; sscanf(buf, "log %" PRIx64, &u); u = d2u(xlog(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "exp ")) { uint64_t u; sscanf(buf, "exp %" PRIx64, &u); u = d2u(xexp(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "atan2 ")) { uint64_t u, v; sscanf(buf, "atan2 %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xatan2(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "asin_u1 ")) { uint64_t u; sscanf(buf, "asin_u1 %" PRIx64, &u); u = d2u(xasin_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "acos_u1 ")) { uint64_t u; sscanf(buf, "acos_u1 %" PRIx64, &u); u = d2u(xacos_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "atan_u1 ")) { uint64_t u; sscanf(buf, "atan_u1 %" PRIx64, &u); u = d2u(xatan_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "atan2_u1 ")) { uint64_t u, v; sscanf(buf, "atan2_u1 %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xatan2_u1(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log_u1 ")) { uint64_t u; sscanf(buf, "log_u1 %" PRIx64, &u); u = d2u(xlog_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "pow ")) { uint64_t u, v; sscanf(buf, "pow %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xpow(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sinh ")) { uint64_t u; sscanf(buf, "sinh %" PRIx64, &u); u = d2u(xsinh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cosh ")) { uint64_t u; sscanf(buf, "cosh %" PRIx64, &u); u = d2u(xcosh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "tanh ")) { uint64_t u; sscanf(buf, "tanh %" PRIx64, &u); u = d2u(xtanh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sinh_u35 ")) { uint64_t u; sscanf(buf, "sinh_u35 %" PRIx64, &u); u = d2u(xsinh_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cosh_u35 ")) { uint64_t u; sscanf(buf, "cosh_u35 %" PRIx64, &u); u = d2u(xcosh_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "tanh_u35 ")) { uint64_t u; sscanf(buf, "tanh_u35 %" PRIx64, &u); u = d2u(xtanh_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "asinh ")) { uint64_t u; sscanf(buf, "asinh %" PRIx64, &u); u = d2u(xasinh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "acosh ")) { uint64_t u; sscanf(buf, "acosh %" PRIx64, &u); u = d2u(xacosh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "atanh ")) { uint64_t u; sscanf(buf, "atanh %" PRIx64, &u); u = d2u(xatanh(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fma ")) { uint64_t u, v, w; sscanf(buf, "fma %" PRIx64 " %" PRIx64 " %" PRIx64, &u, &v, &w); u = d2u(xfma(u2d(u), u2d(v), u2d(w))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sqrt ")) { uint64_t u; sscanf(buf, "sqrt %" PRIx64, &u); u = d2u(xsqrt(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sqrt_u05 ")) { uint64_t u; sscanf(buf, "sqrt_u05 %" PRIx64, &u); u = d2u(xsqrt_u05(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sqrt_u35 ")) { uint64_t u; sscanf(buf, "sqrt_u35 %" PRIx64, &u); u = d2u(xsqrt_u35(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cbrt ")) { uint64_t u; sscanf(buf, "cbrt %" PRIx64, &u); u = d2u(xcbrt(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "cbrt_u1 ")) { uint64_t u; sscanf(buf, "cbrt_u1 %" PRIx64, &u); u = d2u(xcbrt_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "exp2 ")) { uint64_t u; sscanf(buf, "exp2 %" PRIx64, &u); u = d2u(xexp2(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "exp10 ")) { uint64_t u; sscanf(buf, "exp10 %" PRIx64, &u); u = d2u(xexp10(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "expm1 ")) { uint64_t u; sscanf(buf, "expm1 %" PRIx64, &u); u = d2u(xexpm1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log10 ")) { uint64_t u; sscanf(buf, "log10 %" PRIx64, &u); u = d2u(xlog10(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log2 ")) { uint64_t u; sscanf(buf, "log2 %" PRIx64, &u); u = d2u(xlog2(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "log1p ")) { uint64_t u; sscanf(buf, "log1p %" PRIx64, &u); u = d2u(xlog1p(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "ldexp ")) { uint64_t u, v; sscanf(buf, "ldexp %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xldexp(u2d(u), (int)u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "hypot_u05 ")) { uint64_t u, v; sscanf(buf, "hypot_u05 %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xhypot_u05(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "hypot_u35 ")) { uint64_t u, v; sscanf(buf, "hypot_u35 %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xhypot_u35(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "copysign ")) { uint64_t u, v; sscanf(buf, "copysign %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xcopysign(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fmax ")) { uint64_t u, v; sscanf(buf, "fmax %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xfmax(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fmin ")) { uint64_t u, v; sscanf(buf, "fmin %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xfmin(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fdim ")) { uint64_t u, v; sscanf(buf, "fdim %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xfdim(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "nextafter ")) { uint64_t u, v; sscanf(buf, "nextafter %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xnextafter(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fmod ")) { uint64_t u, v; sscanf(buf, "fmod %" PRIx64 " %" PRIx64, &u, &v); u = d2u(xfmod(u2d(u), u2d(v))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "fabs ")) { uint64_t u; sscanf(buf, "fabs %" PRIx64, &u); u = d2u(xfabs(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "trunc ")) { uint64_t u; sscanf(buf, "trunc %" PRIx64, &u); u = d2u(xtrunc(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "floor ")) { uint64_t u; sscanf(buf, "floor %" PRIx64, &u); u = d2u(xfloor(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "ceil ")) { uint64_t u; sscanf(buf, "ceil %" PRIx64, &u); u = d2u(xceil(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "round ")) { uint64_t u; sscanf(buf, "round %" PRIx64, &u); u = d2u(xround(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "rint ")) { uint64_t u; sscanf(buf, "rint %" PRIx64, &u); u = d2u(xrint(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "frfrexp ")) { uint64_t u; sscanf(buf, "frfrexp %" PRIx64, &u); u = d2u(xfrfrexp(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "modf ")) { uint64_t u; sscanf(buf, "modf %" PRIx64, &u); Sleef_double2 x = xmodf(u2d(u)); printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y)); } else if (startsWith(buf, "tgamma_u1 ")) { uint64_t u; sscanf(buf, "tgamma_u1 %" PRIx64, &u); u = d2u(xtgamma_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "lgamma_u1 ")) { uint64_t u; sscanf(buf, "lgamma_u1 %" PRIx64, &u); u = d2u(xlgamma_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "erf_u1 ")) { uint64_t u; sscanf(buf, "erf_u1 %" PRIx64, &u); u = d2u(xerf_u1(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "erfc_u15 ")) { uint64_t u; sscanf(buf, "erfc_u15 %" PRIx64, &u); u = d2u(xerfc_u15(u2d(u))); printf("%" PRIx64 "\n", u); } else if (startsWith(buf, "sinf ")) { uint32_t u; sscanf(buf, "sinf %x", &u); u = f2u(xsinf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "cosf ")) { uint32_t u; sscanf(buf, "cosf %x", &u); u = f2u(xcosf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sincosf ")) { uint32_t u; sscanf(buf, "sincosf %x", &u); Sleef_float2 x = xsincosf(u2f(u)); printf("%x %x\n", f2u(x.x), f2u(x.y)); } else if (startsWith(buf, "tanf ")) { uint32_t u; sscanf(buf, "tanf %x", &u); u = f2u(xtanf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "asinf ")) { uint32_t u; sscanf(buf, "asinf %x", &u); u = f2u(xasinf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "acosf ")) { uint32_t u; sscanf(buf, "acosf %x", &u); u = f2u(xacosf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "atanf ")) { uint32_t u; sscanf(buf, "atanf %x", &u); u = f2u(xatanf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "atan2f ")) { uint32_t u, v; sscanf(buf, "atan2f %x %x", &u, &v); u = f2u(xatan2f(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "logf ")) { uint32_t u; sscanf(buf, "logf %x", &u); u = f2u(xlogf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "expf ")) { uint32_t u; sscanf(buf, "expf %x", &u); u = f2u(xexpf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "cbrtf ")) { uint32_t u; sscanf(buf, "cbrtf %x", &u); u = f2u(xcbrtf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sqrtf ")) { uint32_t u; sscanf(buf, "sqrtf %x", &u); u = f2u(xsqrtf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sqrtf_u05 ")) { uint32_t u; sscanf(buf, "sqrtf_u05 %x", &u); u = f2u(xsqrtf_u05(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sqrtf_u35 ")) { uint32_t u; sscanf(buf, "sqrtf_u35 %x", &u); u = f2u(xsqrtf_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "ldexpf ")) { uint32_t u, v; sscanf(buf, "ldexpf %x %x", &u, &v); u = f2u(xldexpf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "powf ")) { uint32_t u, v; sscanf(buf, "powf %x %x", &u, &v); u = f2u(xpowf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "sinhf ")) { uint32_t u; sscanf(buf, "sinhf %x", &u); u = f2u(xsinhf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "coshf ")) { uint32_t u; sscanf(buf, "coshf %x", &u); u = f2u(xcoshf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "tanhf ")) { uint32_t u; sscanf(buf, "tanhf %x", &u); u = f2u(xtanhf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sinhf_u35 ")) { uint32_t u; sscanf(buf, "sinhf_u35 %x", &u); u = f2u(xsinhf_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "coshf_u35 ")) { uint32_t u; sscanf(buf, "coshf_u35 %x", &u); u = f2u(xcoshf_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "tanhf_u35 ")) { uint32_t u; sscanf(buf, "tanhf_u35 %x", &u); u = f2u(xtanhf_u35(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "asinhf ")) { uint32_t u; sscanf(buf, "asinhf %x", &u); u = f2u(xasinhf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "acoshf ")) { uint32_t u; sscanf(buf, "acoshf %x", &u); u = f2u(xacoshf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "atanhf ")) { uint32_t u; sscanf(buf, "atanhf %x", &u); u = f2u(xatanhf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "exp2f ")) { uint32_t u; sscanf(buf, "exp2f %x", &u); u = f2u(xexp2f(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "exp10f ")) { uint32_t u; sscanf(buf, "exp10f %x", &u); u = f2u(xexp10f(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "expm1f ")) { uint32_t u; sscanf(buf, "expm1f %x", &u); u = f2u(xexpm1f(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "log10f ")) { uint32_t u; sscanf(buf, "log10f %x", &u); u = f2u(xlog10f(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "log2f ")) { uint32_t u; sscanf(buf, "log2f %x", &u); u = f2u(xlog2f(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "log1pf ")) { uint32_t u; sscanf(buf, "log1pf %x", &u); u = f2u(xlog1pf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sinf_u1 ")) { uint32_t u; sscanf(buf, "sinf_u1 %x", &u); u = f2u(xsinf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "cosf_u1 ")) { uint32_t u; sscanf(buf, "cosf_u1 %x", &u); u = f2u(xcosf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "sincosf_u1 ")) { uint32_t u; sscanf(buf, "sincosf_u1 %x", &u); Sleef_float2 x = xsincosf_u1(u2f(u)); printf("%x %x\n", f2u(x.x), f2u(x.y)); } else if (startsWith(buf, "sincospif_u05 ")) { uint32_t u; sscanf(buf, "sincospif_u05 %x", &u); Sleef_float2 x = xsincospif_u05(u2f(u)); printf("%x %x\n", f2u(x.x), f2u(x.y)); } else if (startsWith(buf, "sincospif_u35 ")) { uint32_t u; sscanf(buf, "sincospif_u35 %x", &u); Sleef_float2 x = xsincospif_u35(u2f(u)); printf("%x %x\n", f2u(x.x), f2u(x.y)); } else if (startsWith(buf, "sinpif_u05 ")) { uint32_t u; sscanf(buf, "sinpif_u05 %x", &u); u = f2u(xsinpif_u05(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "cospif_u05 ")) { uint32_t u; sscanf(buf, "cospif_u05 %x", &u); u = f2u(xcospif_u05(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "tanf_u1 ")) { uint32_t u; sscanf(buf, "tanf_u1 %x", &u); u = f2u(xtanf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "asinf_u1 ")) { uint32_t u; sscanf(buf, "asinf_u1 %x", &u); u = f2u(xasinf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "acosf_u1 ")) { uint32_t u; sscanf(buf, "acosf_u1 %x", &u); u = f2u(xacosf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "atanf_u1 ")) { uint32_t u; sscanf(buf, "atanf_u1 %x", &u); u = f2u(xatanf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "atan2f_u1 ")) { uint32_t u, v; sscanf(buf, "atan2f_u1 %x %x", &u, &v); u = f2u(xatan2f_u1(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "logf_u1 ")) { uint32_t u; sscanf(buf, "logf_u1 %x", &u); u = f2u(xlogf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "cbrtf_u1 ")) { uint32_t u; sscanf(buf, "cbrtf_u1 %x", &u); u = f2u(xcbrtf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "ilogb ")) { uint64_t u; int i; sscanf(buf, "ilogb %" PRIx64, &u); i = xilogb(u2d(u)); printf("%d\n", i); } else if (startsWith(buf, "ilogbf ")) { uint32_t u; int i; sscanf(buf, "ilogbf %x", &u); i = xilogbf(u2f(u)); printf("%d\n", i); } else if (startsWith(buf, "hypotf_u05 ")) { uint32_t u, v; sscanf(buf, "hypotf_u05 %x %x", &u, &v); u = f2u(xhypotf_u05(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "hypotf_u35 ")) { uint32_t u, v; sscanf(buf, "hypotf_u35 %x %x", &u, &v); u = f2u(xhypotf_u35(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "copysignf ")) { uint32_t u, v; sscanf(buf, "copysignf %x %x", &u, &v); u = f2u(xcopysignf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fmaxf ")) { uint32_t u, v; sscanf(buf, "fmaxf %x %x", &u, &v); u = f2u(xfmaxf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fminf ")) { uint32_t u, v; sscanf(buf, "fminf %x %x", &u, &v); u = f2u(xfminf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fdimf ")) { uint32_t u, v; sscanf(buf, "fdimf %x %x", &u, &v); u = f2u(xfdimf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "nextafterf ")) { uint32_t u, v; sscanf(buf, "nextafterf %x %x", &u, &v); u = f2u(xnextafterf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fmodf ")) { uint32_t u, v; sscanf(buf, "fmodf %x %x", &u, &v); u = f2u(xfmodf(u2f(u), u2f(v))); printf("%x\n", u); } else if (startsWith(buf, "fabsf ")) { uint32_t u; sscanf(buf, "fabsf %x", &u); u = f2u(xfabsf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "truncf ")) { uint32_t u; sscanf(buf, "truncf %x", &u); u = f2u(xtruncf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "floorf ")) { uint32_t u; sscanf(buf, "floorf %x", &u); u = f2u(xfloorf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "ceilf ")) { uint32_t u; sscanf(buf, "ceilf %x", &u); u = f2u(xceilf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "roundf ")) { uint32_t u; sscanf(buf, "roundf %x", &u); u = f2u(xroundf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "rintf ")) { uint32_t u; sscanf(buf, "rintf %x", &u); u = f2u(xrintf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "frfrexpf ")) { uint32_t u; sscanf(buf, "frfrexpf %x", &u); u = f2u(xfrfrexpf(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "modff ")) { uint32_t u; sscanf(buf, "modff %x", &u); Sleef_float2 x = xmodff(u2f(u)); printf("%x %x\n", f2u(x.x), f2u(x.y)); } else if (startsWith(buf, "tgammaf_u1 ")) { uint32_t u; sscanf(buf, "tgammaf_u1 %x", &u); u = f2u(xtgammaf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "lgammaf_u1 ")) { uint32_t u; sscanf(buf, "lgammaf_u1 %x", &u); u = f2u(xlgammaf_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "erff_u1 ")) { uint32_t u; sscanf(buf, "erff_u1 %x", &u); u = f2u(xerff_u1(u2f(u))); printf("%x\n", u); } else if (startsWith(buf, "erfcf_u15 ")) { uint32_t u; sscanf(buf, "erfcf_u15 %x", &u); u = f2u(xerfcf_u15(u2f(u))); printf("%x\n", u); } else { break; } fflush(stdout); } return 0; } sleef-3.3.1/src/libm-tester/iutsimd.c000066400000000000000000000344311333715643700174750ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #if defined(_MSC_VER) #define STDIN_FILENO 0 #else #include #include #include #endif #include "misc.h" #include "sleef.h" #include "testerutil.h" #define DORENAME #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #include "renamesse2.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_SSE4 #define CONFIG 4 #include "helpersse2.h" #include "renamesse4.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #include "renameavx.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #include "renamefma4.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #include "renameavx2.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #include "renameavx2128.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #include "renameavx512f.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif #ifdef ENABLE_VECEXT #define CONFIG 1 #include "helpervecext.h" #include "norename.h" #endif #ifdef ENABLE_PUREC #define CONFIG 1 #include "helperpurec.h" #include "norename.h" #endif #ifdef ENABLE_NEON32 #define CONFIG 1 #include "helperneon32.h" #include "renameneon32.h" typedef Sleef_float32x4_t_2 vfloat2; #endif #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #include "renameadvsimd.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif #ifdef ENABLE_DSP128 #define CONFIG 2 #include "helpersse2.h" #include "renamedsp128.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #ifdef DORENAME #include "renamesve.h" typedef Sleef_svfloat64_t_2 vdouble2; typedef Sleef_svfloat32_t_2 vfloat2; #endif #endif #ifdef ENABLE_DSP256 #define CONFIG 1 #include "helperavx.h" #include "renamedsp256.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #include "renamevsx.h" typedef Sleef_vector_double_2 vdouble2; typedef Sleef_vector_float_2 vfloat2; #endif // #ifdef ENABLE_DP int check_featureDP() { if (vavailability_i(1) == 0) return 0; double s[VECTLENDP]; int i; for(i=0;i #include #include #include #include static jmp_buf sigjmp; int do_test(int argc, char **argv); int check_featureDP(); int check_featureSP(); static void sighandler(int signum) { longjmp(sigjmp, 1); } int detectFeatureDP() { signal(SIGILL, sighandler); if (setjmp(sigjmp) == 0) { int r = check_featureDP(); signal(SIGILL, SIG_DFL); return r; } else { signal(SIGILL, SIG_DFL); return 0; } } int detectFeatureSP() { signal(SIGILL, sighandler); if (setjmp(sigjmp) == 0) { int r = check_featureSP(); signal(SIGILL, SIG_DFL); return r; } else { signal(SIGILL, SIG_DFL); return 0; } } int main(int argc, char **argv) { if (!detectFeatureDP() && !detectFeatureSP()) { fprintf(stderr, "\n\n***** This host does not support the necessary CPU features to execute this program *****\n\n\n"); printf("0\n"); fclose(stdout); exit(-1); } return do_test(argc, argv); } sleef-3.3.1/src/libm-tester/tester.c000066400000000000000000005375671333715643700173470ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // This define is needed to prevent the `execvpe` function to raise a // warning at compile time. For more information, see // https://linux.die.net/man/3/execvp. #define _GNU_SOURCE #include #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include #include #include #include #include #include "misc.h" #include "testerutil.h" #ifndef NANf #define NANf ((float)NAN) #endif void stop(char *mes) { fprintf(stderr, "%s\n", mes); exit(-1); } int ptoc[2], ctop[2]; int pid; extern char **environ; void startChild(const char *path, char *const argv[]) { pipe(ptoc); pipe(ctop); pid = fork(); assert(pid != -1); if (pid == 0) { // child process char buf0[1], buf1[1]; int i; close(ptoc[1]); close(ctop[0]); fflush(stdin); fflush(stdout); i = dup2(ptoc[0], fileno(stdin)); assert(i != -1); i = dup2(ctop[1], fileno(stdout)); assert(i != -1); setvbuf(stdin, buf0, _IONBF,0); setvbuf(stdout, buf1, _IONBF,0); fflush(stdin); fflush(stdout); #if !defined(__APPLE__) && !defined(__FreeBSD__) execvpe(path, argv, environ); #else execvp(path, argv); #endif fprintf(stderr, "execvp in startChild : %s\n", strerror(errno)); exit(-1); } // parent process close(ptoc[0]); close(ctop[1]); } // #define child_d_d(funcStr, arg) do { \ char str[256]; \ uint64_t u; \ sprintf(str, funcStr " %" PRIx64 "\n", d2u(arg)); \ write(ptoc[1], str, strlen(str)); \ if (readln(ctop[0], str, 255) < 1) stop("child " funcStr); \ sscanf(str, "%" PRIx64, &u); \ return u2d(u); \ } while(0) #define child_d2_d(funcStr, arg) do { \ char str[256]; \ uint64_t u, v; \ sprintf(str, funcStr " %" PRIx64 "\n", d2u(arg)); \ write(ptoc[1], str, strlen(str)); \ if (readln(ctop[0], str, 255) < 1) stop("child " funcStr); \ sscanf(str, "%" PRIx64 " %" PRIx64, &u, &v); \ Sleef_double2 ret; \ ret.x = u2d(u); \ ret.y = u2d(v); \ return ret; \ } while(0) #define child_d_d_d(funcStr, arg1, arg2) do { \ char str[256]; \ uint64_t u; \ sprintf(str, funcStr " %" PRIx64 " %" PRIx64 "\n", d2u(arg1), d2u(arg2)); \ write(ptoc[1], str, strlen(str)); \ if (readln(ctop[0], str, 255) < 1) stop("child " funcStr); \ sscanf(str, "%" PRIx64, &u); \ return u2d(u); \ } while(0) double child_sin(double x) { child_d_d("sin", x); } double child_cos(double x) { child_d_d("cos", x); } double child_tan(double x) { child_d_d("tan", x); } double child_asin(double x) { child_d_d("asin", x); } double child_acos(double x) { child_d_d("acos", x); } double child_atan(double x) { child_d_d("atan", x); } double child_log(double x) { child_d_d("log", x); } double child_exp(double x) { child_d_d("exp", x); } double child_cbrt(double x) { child_d_d("cbrt", x); } double child_atan2(double y, double x) { child_d_d_d("atan2", y, x); } Sleef_double2 child_sincos(double x) { child_d2_d("sincos", x); } double child_sin_u1(double x) { child_d_d("sin_u1", x); } double child_cos_u1(double x) { child_d_d("cos_u1", x); } double child_tan_u1(double x) { child_d_d("tan_u1", x); } double child_asin_u1(double x) { child_d_d("asin_u1", x); } double child_acos_u1(double x) { child_d_d("acos_u1", x); } double child_atan_u1(double x) { child_d_d("atan_u1", x); } double child_log_u1(double x) { child_d_d("log_u1", x); } double child_exp_u1(double x) { child_d_d("exp_u1", x); } double child_cbrt_u1(double x) { child_d_d("cbrt_u1", x); } double child_atan2_u1(double y, double x) { child_d_d_d("atan2_u1", y, x); } Sleef_double2 child_sincos_u1(double x) { child_d2_d("sincos_u1", x); } double child_pow(double x, double y) { child_d_d_d("pow", x, y); } double child_sqrt(double x) { child_d_d("sqrt", x); } double child_sqrt_u05(double x) { child_d_d("sqrt_u05", x); } double child_sqrt_u35(double x) { child_d_d("sqrt_u35", x); } double child_sinh(double x) { child_d_d("sinh", x); } double child_cosh(double x) { child_d_d("cosh", x); } double child_tanh(double x) { child_d_d("tanh", x); } double child_sinh_u35(double x) { child_d_d("sinh_u35", x); } double child_cosh_u35(double x) { child_d_d("cosh_u35", x); } double child_tanh_u35(double x) { child_d_d("tanh_u35", x); } double child_asinh(double x) { child_d_d("asinh", x); } double child_acosh(double x) { child_d_d("acosh", x); } double child_atanh(double x) { child_d_d("atanh", x); } double child_log10(double x) { child_d_d("log10", x); } double child_log2(double x) { child_d_d("log2", x); } double child_log1p(double x) { child_d_d("log1p", x); } double child_exp2(double x) { child_d_d("exp2", x); } double child_exp10(double x) { child_d_d("exp10", x); } double child_expm1(double x) { child_d_d("expm1", x); } Sleef_double2 child_sincospi_u05(double x) { child_d2_d("sincospi_u05", x); } Sleef_double2 child_sincospi_u35(double x) { child_d2_d("sincospi_u35", x); } double child_sinpi_u05(double x) { child_d_d("sinpi_u05", x); } double child_cospi_u05(double x) { child_d_d("cospi_u05", x); } double child_hypot_u05(double x, double y) { child_d_d_d("hypot_u05", x, y); } double child_hypot_u35(double x, double y) { child_d_d_d("hypot_u35", x, y); } double child_copysign(double x, double y) { child_d_d_d("copysign", x, y); } double child_fmax(double x, double y) { child_d_d_d("fmax", x, y); } double child_fmin(double x, double y) { child_d_d_d("fmin", x, y); } double child_fdim(double x, double y) { child_d_d_d("fdim", x, y); } double child_nextafter(double x, double y) { child_d_d_d("nextafter", x, y); } double child_fmod(double x, double y) { child_d_d_d("fmod", x, y); } double child_fabs(double x) { child_d_d("fabs", x); } double child_trunc(double x) { child_d_d("trunc", x); } double child_floor(double x) { child_d_d("floor", x); } double child_ceil(double x) { child_d_d("ceil", x); } double child_round(double x) { child_d_d("round", x); } double child_rint(double x) { child_d_d("rint", x); } double child_frfrexp(double x) { child_d_d("frfrexp", x); } Sleef_double2 child_modf(double x) { child_d2_d("modf", x); } double child_tgamma_u1(double x) { child_d_d("tgamma_u1", x); } double child_lgamma_u1(double x) { child_d_d("lgamma_u1", x); } double child_erf_u1(double x) { child_d_d("erf_u1", x); } double child_erfc_u15(double x) { child_d_d("erfc_u15", x); } // double child_ldexp(double x, int q) { char str[256]; uint64_t u; sprintf(str, "ldexp %" PRIx64 " %" PRIx64 "\n", d2u(x), d2u(q)); write(ptoc[1], str, strlen(str)); if (readln(ctop[0], str, 255) < 1) stop("child_ldexp"); sscanf(str, "%" PRIx64, &u); return u2d(u); } int child_ilogb(double x) { char str[256]; int i; sprintf(str, "ilogb %" PRIx64 "\n", d2u(x)); write(ptoc[1], str, strlen(str)); if (readln(ctop[0], str, 255) < 1) stop("child_ilogb"); sscanf(str, "%d", &i); return i; } // #define child_f_f(funcStr, arg) do { \ char str[256]; \ uint32_t u; \ sprintf(str, funcStr " %x\n", f2u(arg)); \ write(ptoc[1], str, strlen(str)); \ if (readln(ctop[0], str, 255) < 1) stop("child " funcStr); \ sscanf(str, "%x", &u); \ return u2f(u); \ } while(0) #define child_f2_f(funcStr, arg) do { \ char str[256]; \ uint32_t u, v; \ sprintf(str, funcStr " %x\n", f2u(arg)); \ write(ptoc[1], str, strlen(str)); \ if (readln(ctop[0], str, 255) < 1) stop("child " funcStr); \ sscanf(str, "%x %x", &u, &v); \ Sleef_float2 ret; \ ret.x = u2f(u); \ ret.y = u2f(v); \ return ret; \ } while(0) #define child_f_f_f(funcStr, arg1, arg2) do { \ char str[256]; \ uint32_t u; \ sprintf(str, funcStr " %x %x\n", f2u(arg1), f2u(arg2)); \ write(ptoc[1], str, strlen(str)); \ if (readln(ctop[0], str, 255) < 1) stop("child " funcStr); \ sscanf(str, "%x", &u); \ return u2f(u); \ } while(0) float child_sinf(float x) { child_f_f("sinf", x); } float child_cosf(float x) { child_f_f("cosf", x); } float child_tanf(float x) { child_f_f("tanf", x); } float child_asinf(float x) { child_f_f("asinf", x); } float child_acosf(float x) { child_f_f("acosf", x); } float child_atanf(float x) { child_f_f("atanf", x); } float child_logf(float x) { child_f_f("logf", x); } float child_expf(float x) { child_f_f("expf", x); } float child_cbrtf(float x) { child_f_f("cbrtf", x); } float child_atan2f(float y, float x) { child_f_f_f("atan2f", y, x); } Sleef_float2 child_sincosf(float x) { child_f2_f("sincosf", x); } float child_sinf_u1(float x) { child_f_f("sinf_u1", x); } float child_cosf_u1(float x) { child_f_f("cosf_u1", x); } float child_tanf_u1(float x) { child_f_f("tanf_u1", x); } float child_asinf_u1(float x) { child_f_f("asinf_u1", x); } float child_acosf_u1(float x) { child_f_f("acosf_u1", x); } float child_atanf_u1(float x) { child_f_f("atanf_u1", x); } float child_logf_u1(float x) { child_f_f("logf_u1", x); } float child_expf_u1(float x) { child_f_f("expf_u1", x); } float child_cbrtf_u1(float x) { child_f_f("cbrtf_u1", x); } float child_atan2f_u1(float y, float x) { child_f_f_f("atan2f_u1", y, x); } Sleef_float2 child_sincosf_u1(float x) { child_f2_f("sincosf_u1", x); } float child_powf(float x, float y) { child_f_f_f("powf", x, y); } float child_sqrtf(float x) { child_f_f("sqrtf", x); } float child_sqrtf_u05(float x) { child_f_f("sqrtf_u05", x); } float child_sqrtf_u35(float x) { child_f_f("sqrtf_u35", x); } float child_sinhf(float x) { child_f_f("sinhf", x); } float child_coshf(float x) { child_f_f("coshf", x); } float child_tanhf(float x) { child_f_f("tanhf", x); } float child_sinhf_u35(float x) { child_f_f("sinhf_u35", x); } float child_coshf_u35(float x) { child_f_f("coshf_u35", x); } float child_tanhf_u35(float x) { child_f_f("tanhf_u35", x); } float child_asinhf(float x) { child_f_f("asinhf", x); } float child_acoshf(float x) { child_f_f("acoshf", x); } float child_atanhf(float x) { child_f_f("atanhf", x); } float child_log10f(float x) { child_f_f("log10f", x); } float child_log2f(float x) { child_f_f("log2f", x); } float child_log1pf(float x) { child_f_f("log1pf", x); } float child_exp2f(float x) { child_f_f("exp2f", x); } float child_exp10f(float x) { child_f_f("exp10f", x); } float child_expm1f(float x) { child_f_f("expm1f", x); } Sleef_float2 child_sincospif_u05(float x) { child_f2_f("sincospif_u05", x); } Sleef_float2 child_sincospif_u35(float x) { child_f2_f("sincospif_u35", x); } float child_sinpif_u05(float x) { child_f_f("sinpif_u05", x); } float child_cospif_u05(float x) { child_f_f("cospif_u05", x); } float child_hypotf_u05(float x, float y) { child_f_f_f("hypotf_u05", x, y); } float child_hypotf_u35(float x, float y) { child_f_f_f("hypotf_u35", x, y); } float child_copysignf(float x, float y) { child_f_f_f("copysignf", x, y); } float child_fmaxf(float x, float y) { child_f_f_f("fmaxf", x, y); } float child_fminf(float x, float y) { child_f_f_f("fminf", x, y); } float child_fdimf(float x, float y) { child_f_f_f("fdimf", x, y); } float child_nextafterf(float x, float y) { child_f_f_f("nextafterf", x, y); } float child_fmodf(float x, float y) { child_f_f_f("fmodf", x, y); } float child_fabsf(float x) { child_f_f("fabsf", x); } float child_truncf(float x) { child_f_f("truncf", x); } float child_floorf(float x) { child_f_f("floorf", x); } float child_ceilf(float x) { child_f_f("ceilf", x); } float child_roundf(float x) { child_f_f("roundf", x); } float child_rintf(float x) { child_f_f("rintf", x); } float child_frfrexpf(float x) { child_f_f("frfrexpf", x); } Sleef_float2 child_modff(float x) { child_f2_f("modff", x); } float child_tgammaf_u1(float x) { child_f_f("tgammaf_u1", x); } float child_lgammaf_u1(float x) { child_f_f("lgammaf_u1", x); } float child_erff_u1(float x) { child_f_f("erff_u1", x); } float child_erfcf_u15(float x) { child_f_f("erfcf_u15", x); } float child_ldexpf(float x, int q) { char str[256]; uint32_t u; sprintf(str, "ldexpf %x %x\n", f2u(x), f2u(q)); write(ptoc[1], str, strlen(str)); if (readln(ctop[0], str, 255) < 1) stop("child_powf"); sscanf(str, "%x", &u); return u2f(u); } int child_ilogbf(float x) { char str[256]; int i; sprintf(str, "ilogbf %x\n", f2u(x)); write(ptoc[1], str, strlen(str)); if (readln(ctop[0], str, 255) < 1) stop("child_ilogbf"); sscanf(str, "%d", &i); return i; } // int allTestsPassed = 1; void showResult(int success) { if (!success) allTestsPassed = 0; fprintf(stderr, "%s\n", success ? "OK" : "NG **************"); if (!success) { fprintf(stderr, "\n\n*** Test failed\n"); exit(-1); } } int enableDP = 0, enableSP = 0; void do_test() { mpfr_t frc, frt, frx, fry, frz; mpfr_inits(frc, frt, frx, fry, frz, NULL); int i, j; int64_t i64; double d, x, y; int success = 1; if (enableDP) { fprintf(stderr, "Denormal/nonnumber test atan2(y, x)\n\n"); fprintf(stderr, "If y is +0 and x is -0, +pi is returned : "); showResult(child_atan2(+0.0, -0.0) == M_PI); fprintf(stderr, "If y is -0 and x is -0, -pi is returned : "); showResult(child_atan2(-0.0, -0.0) == -M_PI); fprintf(stderr, "If y is +0 and x is +0, +0 is returned : "); showResult(isPlusZero(child_atan2(+0.0, +0.0))); fprintf(stderr, "If y is -0 and x is +0, -0 is returned : "); showResult(isMinusZero(child_atan2(-0.0, +0.0))); fprintf(stderr, "If y is positive infinity and x is negative infinity, +3*pi/4 is returned : "); showResult(child_atan2(POSITIVE_INFINITY, NEGATIVE_INFINITY) == 3*M_PI/4); fprintf(stderr, "If y is negative infinity and x is negative infinity, -3*pi/4 is returned : "); showResult(child_atan2(NEGATIVE_INFINITY, NEGATIVE_INFINITY) == -3*M_PI/4); fprintf(stderr, "If y is positive infinity and x is positive infinity, +pi/4 is returned : "); showResult(child_atan2(POSITIVE_INFINITY, POSITIVE_INFINITY) == M_PI/4); fprintf(stderr, "If y is negative infinity and x is positive infinity, -pi/4 is returned : "); showResult(child_atan2(NEGATIVE_INFINITY, POSITIVE_INFINITY) == -M_PI/4); { fprintf(stderr, "If y is +0 and x is less than 0, +pi is returned : "); double ya[] = { +0.0 }; double xa[] = { -100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5 }; for(i=0;i bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, childFunc(argx), mpfr_get_d(frc, GMP_RNDN), countULPdp(childFunc(argx), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyNR_d(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, argx, GMP_RNDN); \ mpfrFunc(frc, frx); \ if (countULPdp(childFunc(argx), frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, childFunc(argx), mpfr_get_d(frc, GMP_RNDN), countULPdp(childFunc(argx), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracy_d_d(mpfrFunc, childFunc, argx, argy, bound) do { \ mpfr_set_d(frx, argx, GMP_RNDN); \ mpfr_set_d(fry, argy, GMP_RNDN); \ mpfrFunc(frc, frx, fry, GMP_RNDN); \ if (countULPdp(childFunc(argx, argy), frc) > bound) { \ fprintf(stderr, "\narg = %.20g, %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \ argx, argy, childFunc(argx, argy), mpfr_get_d(frc, GMP_RNDN), countULPdp(childFunc(argx, argy), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyX_d(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, argx, GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ Sleef_double2 d2 = childFunc(argx); \ if (countULPdp(d2.x, frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, d2.x, mpfr_get_d(frc, GMP_RNDN), countULPdp(d2.x, frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyY_d(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, argx, GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ Sleef_double2 d2 = childFunc(argx); \ if (countULPdp(d2.y, frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", argx, d2.y, mpfr_get_d(frc, GMP_RNDN), countULPdp(d2.y, frc)); \ success = 0; \ break; \ } \ } while(0) // fprintf(stderr, "\nAccuracy test\n"); // if (enableDP) { fprintf(stderr, "hypot_u35 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_hypot, child_hypot_u35, y, x, 3.5); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_hypot, child_hypot_u35, y, x, 3.5); } showResult(success); // fprintf(stderr, "hypot_u05 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_hypot, child_hypot_u05, y, x, 0.5); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_hypot, child_hypot_u05, y, x, 0.5); } showResult(success); // fprintf(stderr, "copysign : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_copysign, child_copysign, y, x, 0); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_copysign, child_copysign, y, x, 0); } showResult(success); // fprintf(stderr, "fmax : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_max, child_fmax, y, x, 0); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_max, child_fmax, y, x, 0); } showResult(success); // fprintf(stderr, "fmin : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_min, child_fmin, y, x, 0); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_min, child_fmin, y, x, 0); } showResult(success); // fprintf(stderr, "fdim : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_dim, child_fdim, y, x, 0.5); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_dim, child_fdim, y, x, 0.5); } showResult(success); // fprintf(stderr, "fmod : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_fmod, child_fmod, y, x, 0.5); } for(y = -1e+10;y < 1e+10 && success;y += 1.51e+8) { for(x = -1e+10;x < 1e+10 && success;x += 1.51e+8) checkAccuracy_d_d(mpfr_fmod, child_fmod, y, x, 0.5); } showResult(success); // fprintf(stderr, "trunc : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_trunc, child_trunc, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_d(mpfr_trunc, child_trunc, d, 0); { double start = u2d(d2u((double)(1LL << 52))-20), end = u2d(d2u((double)(1LL << 52))+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_trunc, child_trunc, d, 0); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_trunc, child_trunc, -d, 0); } showResult(success); // fprintf(stderr, "floor : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_floor, child_floor, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_d(mpfr_floor, child_floor, d, 0); { double start = u2d(d2u((double)(1LL << 52))-20), end = u2d(d2u((double)(1LL << 52))+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_floor, child_floor, d, 0); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_floor, child_floor, -d, 0); } showResult(success); // fprintf(stderr, "ceil : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_ceil, child_ceil, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_d(mpfr_ceil, child_ceil, d, 0); { double start = u2d(d2u((double)(1LL << 52))-20), end = u2d(d2u((double)(1LL << 52))+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_ceil, child_ceil, d, 0); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_ceil, child_ceil, -d, 0); } showResult(success); // fprintf(stderr, "round : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_round, child_round, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_d(mpfr_round, child_round, d, 0); { double start = u2d(d2u((double)(1LL << 52))-20), end = u2d(d2u((double)(1LL << 52))+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_round, child_round, d, 0); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyNR_d(mpfr_round, child_round, -d, 0); } showResult(success); // fprintf(stderr, "rint : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_rint, child_rint, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracy_d(mpfr_rint, child_rint, d, 0); { double start = u2d(d2u((double)(1LL << 52))-20), end = u2d(d2u((double)(1LL << 52))+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_rint, child_rint, d, 0); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_rint, child_rint, -d, 0); } showResult(success); // fprintf(stderr, "sin : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_sin, child_sin, d, 3.5); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_sin, child_sin, d, 3.5); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_sin, child_sin, pow(2.16, i), 3.5); for(i64=(int64_t)-1e+14;i64<(int64_t)1e+14 && success;i64+=(int64_t)1e+12) { double start = u2d(d2u(M_PI_4 * i64)-20), end = u2d(d2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_sin, child_sin, d, 3.5); } showResult(success); // fprintf(stderr, "sin_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_sin, child_sin_u1, d, 1.0); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_sin, child_sin_u1, d, 1.0); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_sin, child_sin_u1, pow(2.16, i), 1.0); for(i64=(int64_t)-1e+14;i64<(int64_t)1e+14 && success;i64+=(int64_t)1e+12) { double start = u2d(d2u(M_PI_4 * i64)-20), end = u2d(d2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_sin, child_sin_u1, d, 1.0); } showResult(success); // fprintf(stderr, "sin in sincos : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyX_d(mpfr_sin, child_sincos, d, 3.5); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracyX_d(mpfr_sin, child_sincos, d, 3.5); for(i = 0;i < 920 && success;i++) checkAccuracyX_d(mpfr_sin, child_sincos, pow(2.16, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sin, child_sincos, d, 3.5); } showResult(success); // fprintf(stderr, "sin in sincos_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyX_d(mpfr_sin, child_sincos_u1, d, 1.0); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracyX_d(mpfr_sin, child_sincos_u1, d, 1.0); for(i = 0;i < 920 && success;i++) checkAccuracyX_d(mpfr_sin, child_sincos_u1, pow(2.16, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sin, child_sincos_u1, d, 1.0); } showResult(success); // mpfr_set_default_prec(1280); fprintf(stderr, "sin in sincospi_u35 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u35, d, 3.5); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u35, d, 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u35, d, 3.5); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u35, d, 3.5); } showResult(success); // fprintf(stderr, "sin in sincospi_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u05, d, 0.506); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyX_d(mpfr_sinpi, child_sincospi_u05, d, 0.506); } showResult(success); // fprintf(stderr, "sinpi_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506); } showResult(success); // fprintf(stderr, "cospi_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracy_d(mpfr_cospi, child_cospi_u05, d, 0.506); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_cospi, child_cospi_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_cospi, child_cospi_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_cospi, child_cospi_u05, d, 0.506); } showResult(success); mpfr_set_default_prec(128); // fprintf(stderr, "cos : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_cos, child_cos, d, 3.5); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_cos, child_cos, d, 3.5); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_cos, child_cos, pow(2.16, i), 3.5); for(i64=(int64_t)-1e+14;i64<(int64_t)1e+14 && success;i64+=(int64_t)1e+12) { double start = u2d(d2u(M_PI_4 * i64)-20), end = u2d(d2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_cos, child_cos, d, 3.5); } showResult(success); // fprintf(stderr, "cos_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_cos, child_cos_u1, d, 1.0); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_cos, child_cos_u1, d, 1.0); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_cos, child_cos_u1, pow(2.16, i), 1.0); for(i64=(int64_t)-1e+14;i64<(int64_t)1e+14 && success;i64+=(int64_t)1e+12) { double start = u2d(d2u(M_PI_4 * i64)-20), end = u2d(d2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_cos, child_cos_u1, d, 1.0); } showResult(success); // fprintf(stderr, "cos in sincos : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyY_d(mpfr_cos, child_sincos, d, 3.5); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracyY_d(mpfr_cos, child_sincos, d, 3.5); for(i = 0;i < 920 && success;i++) checkAccuracyY_d(mpfr_cos, child_sincos, pow(2.16, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cos, child_sincos, d, 3.5); } showResult(success); // fprintf(stderr, "cos in sincos_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyY_d(mpfr_cos, child_sincos_u1, d, 1.0); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracyY_d(mpfr_cos, child_sincos_u1, d, 1.0); for(i = 0;i < 920 && success;i++) checkAccuracyY_d(mpfr_cos, child_sincos_u1, pow(2.16, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cos, child_sincos_u1, d, 1.0); } showResult(success); // mpfr_set_default_prec(1280); fprintf(stderr, "cos in sincospi_u35 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyY_d(mpfr_cospi, child_sincospi_u35, d, 3.5); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u35, d, 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u35, d, 3.5); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u35, d, 3.5); } showResult(success); // fprintf(stderr, "cos in sincospi_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyY_d(mpfr_cospi, child_sincospi_u05, d, 0.506); for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2d(d2u(0.25 * i)-20), end = u2d(d2u(0.25 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracyY_d(mpfr_cospi, child_sincospi_u05, d, 0.506); } showResult(success); mpfr_set_default_prec(128); // fprintf(stderr, "tan : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_tan, child_tan, d, 3.5); for(d = -1e+7;d < 1e+7 && success;d += 1000.1) checkAccuracy_d(mpfr_tan, child_tan, d, 3.5); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_tan, child_tan, d, 3.5); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_tan, child_tan, pow(2.16, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_tan, child_tan, d, 3.5); } showResult(success); // fprintf(stderr, "tan_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_tan, child_tan_u1, d, 1.0); for(d = -1e+7;d < 1e+7 && success;d += 1000.1) checkAccuracy_d(mpfr_tan, child_tan_u1, d, 1.0); for(d = -1e+14;d < 1e+14 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_tan, child_tan_u1, d, 1.0); for(i = 0;i < 920 && success;i++) checkAccuracy_d(mpfr_tan, child_tan_u1, pow(2.16, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2d(d2u(M_PI_4 * i)-20), end = u2d(d2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_tan, child_tan_u1, d, 1.0); } showResult(success); // fprintf(stderr, "log : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log, child_log, d, 3.5); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_d(mpfr_log, child_log, d, 3.5); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log, pow(2.1, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log, DBL_MAX * pow(0.9314821319758632, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log, pow(0.933254300796991, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log, DBL_MIN * pow(0.996323, i), 3.5); showResult(success); // fprintf(stderr, "log_u1 : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log, child_log_u1, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_d(mpfr_log, child_log_u1, d, 1.0); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log_u1, pow(2.1, i), 1.0); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log_u1, DBL_MAX * pow(0.9314821319758632, i), 1.0); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log_u1, pow(0.933254300796991, i), 1.0); for(i=0;i<10000 && success;i+=10) checkAccuracy_d(mpfr_log, child_log_u1, DBL_MIN * pow(0.996323, i), 1.0); showResult(success); // fprintf(stderr, "exp : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_exp, child_exp, d, 1.0); for(d = -1000;d < 1000 && success;d += 1.1) checkAccuracy_d(mpfr_exp, child_exp, d, 1.0); showResult(success); // fprintf(stderr, "pow : "); for(y = 0.1;y < 100 && success;y += 0.6) { for(x = -100;x < 100 && success;x += 0.6) { checkAccuracy_d_d(mpfr_pow, child_pow, x, y, 1.0); } } for(y = -1000;y < 1000 && success;y += 0.1) checkAccuracy_d_d(mpfr_pow, child_pow, 2.1, y, 1.0); showResult(success); // fprintf(stderr, "sqrt : "); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_sqrt, child_sqrt, d, 1.0); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_sqrt, child_sqrt, pow(2.1, d), 1.0); showResult(success); // fprintf(stderr, "sqrt_u05 : "); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_sqrt, child_sqrt_u05, d, 0.506); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_sqrt, child_sqrt_u05, pow(2.1, d), 0.506); showResult(success); // fprintf(stderr, "sqrt_u35 : "); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_sqrt, child_sqrt_u35, d, 3.5); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_sqrt, child_sqrt_u35, pow(2.1, d), 3.5); showResult(success); // fprintf(stderr, "cbrt : "); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_cbrt, child_cbrt, d, 3.5); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_cbrt, child_cbrt, pow(2.1, d), 3.5); showResult(success); // fprintf(stderr, "cbrt_u1 : "); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_cbrt, child_cbrt_u1, d, 1.0); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_d(mpfr_cbrt, child_cbrt_u1, pow(2.1, d), 1.0); showResult(success); // fprintf(stderr, "asin : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_d(mpfr_asin, child_asin, d, 3.5); showResult(success); // fprintf(stderr, "asin_u1 : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_d(mpfr_asin, child_asin_u1, d, 1.0); showResult(success); // fprintf(stderr, "acos : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_d(mpfr_acos, child_acos, d, 3.5); showResult(success); // fprintf(stderr, "acos_u1 : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_d(mpfr_acos, child_acos_u1, d, 1.0); showResult(success); // fprintf(stderr, "atan : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_atan, child_atan, d, 3.5); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_atan, child_atan, d, 3.5); showResult(success); // fprintf(stderr, "atan_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_atan, child_atan_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_d(mpfr_atan, child_atan_u1, d, 1.0); showResult(success); // fprintf(stderr, "atan2 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_atan2, child_atan2, y, x, 3.5); } for(y = -100;y < 100 && success;y += 1.51) { for(x = -100;x < 100 && success;x += 1.51) checkAccuracy_d_d(mpfr_atan2, child_atan2, y, x, 3.5); } showResult(success); // fprintf(stderr, "atan2_u1 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_d_d(mpfr_atan2, child_atan2_u1, y, x, 1.0); } for(y = -100;y < 100 && success;y += 1.51) { for(x = -100;x < 100 && success;x += 1.51) checkAccuracy_d_d(mpfr_atan2, child_atan2_u1, y, x, 1.0); } showResult(success); // fprintf(stderr, "sinh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_sinh, child_sinh, d, 1.0); for(d = -709;d < 709 && success;d += 0.2) checkAccuracy_d(mpfr_sinh, child_sinh, d, 1.0); showResult(success); // fprintf(stderr, "cosh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_cosh, child_cosh, d, 1.0); for(d = -709;d < 709 && success;d += 0.2) checkAccuracy_d(mpfr_cosh, child_cosh, d, 1.0); showResult(success); // fprintf(stderr, "tanh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_tanh, child_tanh, d, 1.0); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_tanh, child_tanh, d, 1.0); showResult(success); // fprintf(stderr, "sinh_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_sinh, child_sinh_u35, d, 3.5); for(d = -709;d < 709 && success;d += 0.2) checkAccuracy_d(mpfr_sinh, child_sinh_u35, d, 3.5); showResult(success); // fprintf(stderr, "cosh_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_cosh, child_cosh_u35, d, 3.5); for(d = -709;d < 709 && success;d += 0.2) checkAccuracy_d(mpfr_cosh, child_cosh_u35, d, 3.5); showResult(success); // fprintf(stderr, "tanh_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_tanh, child_tanh_u35, d, 3.5); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_tanh, child_tanh_u35, d, 3.5); showResult(success); // fprintf(stderr, "asinh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_asinh, child_asinh, d, 1.0); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_asinh, child_asinh, d, 1.0); showResult(success); // fprintf(stderr, "acosh : "); for(d = 1;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_acosh, child_acosh, d, 1.0); for(d = 1;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_acosh, child_acosh, d, 1.0); showResult(success); // fprintf(stderr, "atanh : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_atanh, child_atanh, d, 1.0); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_atanh, child_atanh, d, 1.0); showResult(success); // fprintf(stderr, "exp2 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_exp2, child_exp2, d, 1.0); for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_d(mpfr_exp2, child_exp2, d, 1.0); showResult(success); // fprintf(stderr, "exp10 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_exp10, child_exp10, d, 1.0); for(d = -300;d < 300 && success;d += 0.1) checkAccuracy_d(mpfr_exp10, child_exp10, d, 1.0); showResult(success); // fprintf(stderr, "expm1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_expm1, child_expm1, d, 1.0); for(d = -1000;d < 1000 && success;d += 0.21) checkAccuracy_d(mpfr_expm1, child_expm1, d, 1.0); for(d = 0;d < 300 && success;d += 0.21) checkAccuracy_d(mpfr_expm1, child_expm1, pow(10, -d), 1.0); for(d = 0;d < 300 && success;d += 0.21) checkAccuracy_d(mpfr_expm1, child_expm1, (-pow(10, -d)), 1.0); showResult(success); // fprintf(stderr, "log10 : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log10, child_log10, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_d(mpfr_log10, child_log10, d, 1.0); for(i=0;i<10000 && success;i++) checkAccuracy_d(mpfr_log10, child_log10, (DBL_MIN * pow(0.996323, i)), 1.0); showResult(success); // fprintf(stderr, "log2 : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log2, child_log2, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_d(mpfr_log2, child_log2, d, 1.0); for(i=0;i<10000 && success;i++) checkAccuracy_d(mpfr_log2, child_log2, (DBL_MIN * pow(0.996323, i)), 1.0); showResult(success); // fprintf(stderr, "log1p : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log1p, child_log1p, d, 1.0); showResult(success); // fprintf(stderr, "lgamma_u1 : "); for(d = -5000;d < 5000 && success;d += 1.1) checkAccuracy_d(mpfr_lgamma_nosign, child_lgamma_u1, d, 1.0); showResult(success); // fprintf(stderr, "tgamma_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_gamma, child_tgamma_u1, d, 1.0); showResult(success); // fprintf(stderr, "erf_u1 : "); for(d = -100;d < 100 && success;d += 0.02) checkAccuracy_d(mpfr_erf, child_erf_u1, d, 1.0); showResult(success); // fprintf(stderr, "erfc_u15 : "); for(d = -1;d < 100 && success;d += 0.01) checkAccuracy_d(mpfr_erfc, child_erfc_u15, d, 1.5); showResult(success); // { fprintf(stderr, "ilogb : "); for(d = 0.0001;d < 10;d += 0.001) { int q = child_ilogb(d); int c = ilogb(d); if (q != c) { fprintf(stderr, "ilogb : arg = %.20g, test = %d, correct = %d\n", d, ilogb(d), child_ilogb(d)); success = 0; showResult(success); } } for(d = 0.0001;d < 10000;d += 1.1) { int q = child_ilogb(d); int c = ilogb(d); if (q != c) { fprintf(stderr, "ilogb : arg = %.20g, test = %d, correct = %d\n", d, ilogb(d), child_ilogb(d)); success = 0; showResult(success); } } for(i=0;i<10000;i+=10) { d = DBL_MIN * pow(0.996323, i); if (d == 0) continue; int q = child_ilogb(d); int c = ilogb(d); if (q != c) { fprintf(stderr, "ilogb : arg = %.20g, test = %d, correct = %d\n", d, ilogb(d), child_ilogb(d)); success = 0; showResult(success); } } for(i=0;i<10000;i+=10) { d = pow(0.933254300796991, i); if (d == 0) continue; int q = child_ilogb(d); int c = ilogb(d); if (q != c) { fprintf(stderr, "ilogb : arg = %.20g, test = %d, correct = %d\n", d, ilogb(d), child_ilogb(d)); success = 0; showResult(success); } } showResult(success); } } // #define checkAccuracy_f(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ if (countULPsp(childFunc((float)flushToZero(argx)), frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \ (float)flushToZero(argx), (double)childFunc((float)flushToZero(argx)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx)), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyNR_f(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfrFunc(frc, frx); \ if (countULPsp(childFunc((float)flushToZero(argx)), frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \ (float)flushToZero(argx), (double)childFunc((float)flushToZero(argx)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx)), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracy_f_f(mpfrFunc, childFunc, argx, argy, bound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfr_set_d(fry, (float)flushToZero(argy), GMP_RNDN); \ mpfrFunc(frc, frx, fry, GMP_RNDN); \ if (countULPsp(childFunc((float)flushToZero(argx), (float)flushToZero(argy)), frc) > bound) { \ fprintf(stderr, "\narg = %.20g, %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", \ (float)flushToZero(argx), (float)flushToZero(argy), childFunc((float)flushToZero(argx), (float)flushToZero(argy)), mpfr_get_d(frc, GMP_RNDN), countULPsp(childFunc((float)flushToZero(argx), (float)flushToZero(argy)), frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyX_f(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \ if (countULPsp(d2.x, frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", (float)flushToZero(argx), (double)d2.x, mpfr_get_d(frc, GMP_RNDN), countULPsp(d2.x, frc)); \ success = 0; \ break; \ } \ } while(0) #define checkAccuracyY_f(mpfrFunc, childFunc, argx, bound) do { \ mpfr_set_d(frx, (float)flushToZero(argx), GMP_RNDN); \ mpfrFunc(frc, frx, GMP_RNDN); \ Sleef_float2 d2 = childFunc((float)flushToZero(argx)); \ if (countULPsp(d2.y, frc) > bound) { \ fprintf(stderr, "\narg = %.20g, test = %.20g, correct = %.20g, ULP = %lf\n", (float)flushToZero(argx), (double)d2.y, mpfr_get_d(frc, GMP_RNDN), countULPsp(d2.y, frc)); \ success = 0; \ break; \ } \ } while(0) // if (enableSP) { fprintf(stderr, "hypotf_u35 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_hypot, child_hypotf_u35, y, x, 3.5); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_hypot, child_hypotf_u35, y, x, 3.5); } showResult(success); // fprintf(stderr, "hypotf_u05 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_hypot, child_hypotf_u05, y, x, 0.5); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_hypot, child_hypotf_u05, y, x, 0.5); } showResult(success); // fprintf(stderr, "copysignf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_copysign, child_copysignf, y, x, 0); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_copysign, child_copysignf, y, x, 0); } showResult(success); // fprintf(stderr, "fmaxf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_max, child_fmaxf, y, x, 0); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_max, child_fmaxf, y, x, 0); } showResult(success); // fprintf(stderr, "fminf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_min, child_fminf, y, x, 0); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_min, child_fminf, y, x, 0); } showResult(success); // fprintf(stderr, "fdimf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_dim, child_fdimf, y, x, 0.5); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_dim, child_fdimf, y, x, 0.5); } showResult(success); // fprintf(stderr, "fmodf : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_fmod, child_fmodf, y, x, 0.5); } for(y = -1e+7;y < 1e+7 && success;y += 1.51e+5) { for(x = -1e+7;x < 1e+7 && success;x += 1.51e+5) checkAccuracy_f_f(mpfr_fmod, child_fmodf, y, x, 0.5); } showResult(success); // fprintf(stderr, "truncf : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_f(mpfr_trunc, child_truncf, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_f(mpfr_trunc, child_truncf, d, 0); { double start = u2f(f2u((double)(1LL << 23))-20), end = u2f(f2u((double)(1LL << 23))+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_trunc, child_truncf, d, 0); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_trunc, child_truncf, -d, 0); } showResult(success); // fprintf(stderr, "floorf : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_f(mpfr_floor, child_floorf, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_f(mpfr_floor, child_floorf, d, 0); { double start = u2f(f2u((double)(1LL << 23))-20), end = u2f(f2u((double)(1LL << 23))+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_floor, child_floorf, d, 0); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_floor, child_floorf, -d, 0); } showResult(success); // fprintf(stderr, "ceilf : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_f(mpfr_ceil, child_ceilf, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_f(mpfr_ceil, child_ceilf, d, 0); { double start = u2f(f2u((double)(1LL << 23))-20), end = u2f(f2u((double)(1LL << 23))+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_ceil, child_ceilf, d, 0); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_ceil, child_ceilf, -d, 0); } showResult(success); // fprintf(stderr, "roundf : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracyNR_f(mpfr_round, child_roundf, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracyNR_f(mpfr_round, child_roundf, d, 0); { double start = u2f(f2u((double)(1LL << 23))-20), end = u2f(f2u((double)(1LL << 23))+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_round, child_roundf, d, 0); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyNR_f(mpfr_round, child_roundf, -d, 0); } showResult(success); // fprintf(stderr, "rintf : "); for(x = -100.5;x <= 100.5;x+=0.5) { for(d = u2d(d2u(x)-3);d <= u2d(d2u(x)+3) && success;d = u2d(d2u(d)+1)) checkAccuracy_f(mpfr_rint, child_rintf, d, 0); } for(d = -10000;d < 10000 && success;d += 2.5) checkAccuracy_f(mpfr_rint, child_rintf, d, 0); { double start = u2f(f2u((double)(1LL << 23))-20), end = u2f(f2u((double)(1LL << 23))+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_rint, child_rintf, d, 0); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_rint, child_rintf, -d, 0); } showResult(success); // fprintf(stderr, "sinf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_sin, child_sinf, d, 3.5); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_sin, child_sinf, d, 3.5); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_sin, child_sinf, pow(1.092, i), 3.5); for(i64=(int64_t)-1000;i64<(int64_t)1000 && success;i64+=(int64_t)1) { double start = u2f(f2u(M_PI_4 * i64)-20), end = u2f(f2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_sin, child_sinf, d, 3.5); } showResult(success); // fprintf(stderr, "sinf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_sin, child_sinf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_sin, child_sinf_u1, d, 1.0); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_sin, child_sinf_u1, pow(1.092, i), 1.0); for(i64=(int64_t)-1000;i64<(int64_t)1000 && success;i64+=(int64_t)1) { double start = u2f(f2u(M_PI_4 * i64)-20), end = u2f(f2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_sin, child_sinf_u1, d, 1.0); } showResult(success); // fprintf(stderr, "sin in sincosf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyX_f(mpfr_sin, child_sincosf, d, 3.5); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracyX_f(mpfr_sin, child_sincosf, d, 3.5); for(i = 0;i < 1000 && success;i++) checkAccuracyX_f(mpfr_sin, child_sincosf, pow(1.092, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sin, child_sincosf, d, 3.5); } showResult(success); // fprintf(stderr, "sin in sincosf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyX_f(mpfr_sin, child_sincosf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracyX_f(mpfr_sin, child_sincosf_u1, d, 1.0); for(i = 0;i < 1000 && success;i++) checkAccuracyX_f(mpfr_sin, child_sincosf_u1, pow(1.092, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sin, child_sincosf_u1, d, 1.0); } showResult(success); // mpfr_set_default_prec(1280); fprintf(stderr, "sin in sincospif_u35 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u35, d, 3.5); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u35, d, 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u35, d, 3.5); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u35, d, 3.5); } showResult(success); // fprintf(stderr, "sin in sincospif_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u05, d, 0.506); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyX_f(mpfr_sinpi, child_sincospif_u05, d, 0.506); } showResult(success); // fprintf(stderr, "sinpif_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracy_f(mpfr_sinpi, child_sinpif_u05, d, 0.506); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_sinpi, child_sinpif_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_sinpi, child_sinpif_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_sinpi, child_sinpif_u05, d, 0.506); } showResult(success); // fprintf(stderr, "cospif_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracy_f(mpfr_cospi, child_cospif_u05, d, 0.506); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_cospi, child_cospif_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_cospi, child_cospif_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_cospi, child_cospif_u05, d, 0.506); } showResult(success); mpfr_set_default_prec(128); // fprintf(stderr, "cosf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_cos, child_cosf, d, 3.5); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_cos, child_cosf, d, 3.5); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_cos, child_cosf, pow(1.092, i), 3.5); for(i64=(int64_t)-1000;i64<(int64_t)1000 && success;i64+=(int64_t)1) { double start = u2f(f2u(M_PI_4 * i64)-20), end = u2f(f2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_cos, child_cosf, d, 3.5); } showResult(success); // fprintf(stderr, "cosf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_cos, child_cosf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_cos, child_cosf_u1, d, 1.0); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_cos, child_cosf_u1, pow(1.092, i), 1.0); for(i64=(int64_t)-1000;i64<(int64_t)1000 && success;i64+=(int64_t)1) { double start = u2f(f2u(M_PI_4 * i64)-20), end = u2f(f2u(M_PI_4 * i64)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_cos, child_cosf_u1, d, 1.0); } showResult(success); // fprintf(stderr, "cos in sincosf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyY_f(mpfr_cos, child_sincosf, d, 3.5); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracyY_f(mpfr_cos, child_sincosf, d, 3.5); for(i = 0;i < 1000 && success;i++) checkAccuracyY_f(mpfr_cos, child_sincosf, pow(1.092, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cos, child_sincosf, d, 3.5); } showResult(success); // fprintf(stderr, "cos in sincosf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracyY_f(mpfr_cos, child_sincosf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracyY_f(mpfr_cos, child_sincosf_u1, d, 1.0); for(i = 0;i < 1000 && success;i++) checkAccuracyY_f(mpfr_cos, child_sincosf_u1, pow(1.092, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cos, child_sincosf_u1, d, 1.0); } showResult(success); // mpfr_set_default_prec(1280); fprintf(stderr, "cos in sincospif_u35 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyY_f(mpfr_cospi, child_sincospif_u35, d, 3.5); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracyY_f(mpfr_cospi, child_sincospif_u35, d, 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cospi, child_sincospif_u35, d, 3.5); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cospi, child_sincospif_u35, d, 3.5); } showResult(success); // fprintf(stderr, "cos in sincospif_u05 : "); for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracyY_f(mpfr_cospi, child_sincospif_u05, d, 0.506); for(d = -10000-0.1;d < 10000 && success;d += 1.1) checkAccuracyY_f(mpfr_cospi, child_sincospif_u05, d, 0.506); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(i)-20), end = u2f(f2u(i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cospi, child_sincospif_u05, d, 0.506); } for(i=1;i<=20 && success;i++) { double start = u2f(f2u(0.25 * i)-20), end = u2f(f2u(0.25 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracyY_f(mpfr_cospi, child_sincospif_u05, d, 0.506); } showResult(success); mpfr_set_default_prec(128); // fprintf(stderr, "tanf : "); checkAccuracy_f(mpfr_tan, child_tanf, 70.936981201171875, 3.5); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_tan, child_tanf, d, 3.5); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_tan, child_tanf, d, 3.5); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_tan, child_tanf, pow(1.092, i), 3.5); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_tan, child_tanf, d, 3.5); } showResult(success); // fprintf(stderr, "tanf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_tan, child_tanf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_tan, child_tanf_u1, d, 1.0); for(i = 0;i < 1000 && success;i++) checkAccuracy_f(mpfr_tan, child_tanf_u1, pow(1.092, i), 1.0); for(i=1;i<10000 && success;i+=31) { double start = u2f(f2u(M_PI_4 * i)-20), end = u2f(f2u(M_PI_4 * i)+20); for(d = start;d <= end;d = u2f(f2u(d)+1)) checkAccuracy_f(mpfr_tan, child_tanf_u1, d, 1.0); } showResult(success); // fprintf(stderr, "logf : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log, child_logf, d, 3.5); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log, child_logf, d, 3.5); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf, pow(2.1, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf, FLT_MAX * pow(0.9314821319758632, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf, pow(0.933254300796991, i), 3.5); for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf, FLT_MIN * pow(0.996323, i), 3.5); showResult(success); // fprintf(stderr, "logf_u1 : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log, child_logf_u1, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log, child_logf_u1, d, 1.0); if (!enableFlushToZero) { for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf_u1, FLT_MAX * pow(0.9314821319758632, i), 1.0); for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf_u1, pow(2.1, i), 1.0); for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf_u1, pow(0.933254300796991, i), 1.0); for(i=0;i<10000 && success;i+=10) checkAccuracy_f(mpfr_log, child_logf_u1, FLT_MIN * pow(0.996323, i), 1.0); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log, child_logf_u1, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log, child_logf_u1, d, 1.0); } showResult(success); // fprintf(stderr, "expf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_exp, child_expf, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 1.1) checkAccuracy_f(mpfr_exp, child_expf, d, 1.0); } showResult(success); // fprintf(stderr, "powf : "); if (!enableFlushToZero) { for(y = 0.1;y < 100 && success;y += 0.6) { for(x = -100;x < 100 && success;x += 0.6) { checkAccuracy_f_f(mpfr_pow, child_powf, x, y, 1.0); } } for(y = -1000;y < 1000 && success;y += 0.1) checkAccuracy_f_f(mpfr_pow, child_powf, 2.1, y, 1.0); } else { for(y = 0.1;y < 10 && success;y += 0.06) { for(x = -100;x < 10 && success;x += 0.06) { checkAccuracy_f_f(mpfr_pow, child_powf, x, y, 1.0); } } } showResult(success); // fprintf(stderr, "sqrtf : "); if (!enableFlushToZero) { for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_sqrt, child_sqrtf, d, 1.0); } for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_sqrt, child_sqrtf, pow(2.1, d), 1.0); showResult(success); // fprintf(stderr, "sqrtf_u05 : "); if (!enableFlushToZero) { for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u05, d, 0.506); } for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u05, pow(2.1, d), 0.506); showResult(success); // fprintf(stderr, "sqrtf_u35 : "); if (!enableFlushToZero) { for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u35, d, 3.5); } for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_sqrt, child_sqrtf_u35, pow(2.1, d), 3.5); showResult(success); // fprintf(stderr, "cbrtf : "); if (!enableFlushToZero) { for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_cbrt, child_cbrtf, d, 3.5); } for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_cbrt, child_cbrtf, pow(2.1, d), 3.5); showResult(success); // fprintf(stderr, "cbrtf_u1 : "); if (!enableFlushToZero) { for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_cbrt, child_cbrtf_u1, d, 1.0); } for(i = -1000;i <= 1000 && success;i+=10) checkAccuracy_f(mpfr_cbrt, child_cbrtf_u1, pow(2.1, d), 1.0); showResult(success); // fprintf(stderr, "asinf : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_f(mpfr_asin, child_asinf, d, 3.5); showResult(success); // fprintf(stderr, "asinf_u1 : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_f(mpfr_asin, child_asinf_u1, d, 1.0); showResult(success); // fprintf(stderr, "acosf : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_f(mpfr_acos, child_acosf, d, 3.5); showResult(success); // fprintf(stderr, "acosf_u1 : "); for(d = -1;d < 1 && success;d += 0.0002) checkAccuracy_f(mpfr_acos, child_acosf_u1, d, 1.0); showResult(success); // fprintf(stderr, "atanf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_atan, child_atanf, d, 3.5); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_atan, child_atanf, d, 3.5); showResult(success); // fprintf(stderr, "atanf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_atan, child_atanf_u1, d, 1.0); for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_atan, child_atanf_u1, d, 1.0); showResult(success); // fprintf(stderr, "atan2f : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_atan2, child_atan2f, y, x, 3.5); } for(y = -100;y < 100 && success;y += 1.51) { for(x = -100;x < 100 && success;x += 1.51) checkAccuracy_f_f(mpfr_atan2, child_atan2f, y, x, 3.5); } showResult(success); // fprintf(stderr, "atan2f_u1 : "); for(y = -10;y < 10 && success;y += 0.15) { for(x = -10;x < 10 && success;x += 0.15) checkAccuracy_f_f(mpfr_atan2, child_atan2f_u1, y, x, 1.0); } for(y = -100;y < 100 && success;y += 1.51) { for(x = -100;x < 100 && success;x += 1.51) checkAccuracy_f_f(mpfr_atan2, child_atan2f_u1, y, x, 1.0); } showResult(success); // fprintf(stderr, "sinhf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_sinh, child_sinhf, d, 1.0); if (!enableFlushToZero) { for(d = -88;d < 88 && success;d += 0.2) checkAccuracy_f(mpfr_sinh, child_sinhf, d, 1.0); } showResult(success); // fprintf(stderr, "coshf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_cosh, child_coshf, d, 1.0); if (!enableFlushToZero) { for(d = -88;d < 88 && success;d += 0.2) checkAccuracy_f(mpfr_cosh, child_coshf, d, 1.0); } showResult(success); // fprintf(stderr, "tanhf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_tanh, child_tanhf, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_tanh, child_tanhf, d, 1.0); } showResult(success); // fprintf(stderr, "sinhf_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_sinh, child_sinhf_u35, d, 3.5); if (!enableFlushToZero) { for(d = -88;d < 88 && success;d += 0.2) checkAccuracy_f(mpfr_sinh, child_sinhf_u35, d, 3.5); } showResult(success); // fprintf(stderr, "coshf_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_cosh, child_coshf_u35, d, 3.5); if (!enableFlushToZero) { for(d = -88;d < 88 && success;d += 0.2) checkAccuracy_f(mpfr_cosh, child_coshf_u35, d, 3.5); } showResult(success); // fprintf(stderr, "tanhf_u35 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_tanh, child_tanhf_u35, d, 3.5); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_tanh, child_tanhf_u35, d, 3.5); } showResult(success); // fprintf(stderr, "asinhf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_asinh, child_asinhf, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_asinh, child_asinhf, d, 1.0); } showResult(success); // fprintf(stderr, "acoshf : "); for(d = 1;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_acosh, child_acoshf, d, 1.0); if (!enableFlushToZero) { for(d = 1;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_acosh, child_acoshf, d, 1.0); } showResult(success); // fprintf(stderr, "atanhf : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_atanh, child_atanhf, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_atanh, child_atanhf, d, 1.0); } showResult(success); // fprintf(stderr, "exp2f : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_exp2, child_exp2f, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.2) checkAccuracy_f(mpfr_exp2, child_exp2f, d, 1.0); } showResult(success); // fprintf(stderr, "exp10f : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_exp10, child_exp10f, d, 1.0); if (!enableFlushToZero) { for(d = -300;d < 300 && success;d += 0.1) checkAccuracy_f(mpfr_exp10, child_exp10f, d, 1.0); } showResult(success); // fprintf(stderr, "expm1f : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_expm1, child_expm1f, d, 1.0); if (!enableFlushToZero) { for(d = -1000;d < 1000 && success;d += 0.21) checkAccuracy_f(mpfr_expm1, child_expm1f, d, 1.0); for(d = 0;d < 300 && success;d += 0.21) checkAccuracy_f(mpfr_expm1, child_expm1f, pow(10, -d), 1.0); for(d = 0;d < 300 && success;d += 0.21) checkAccuracy_f(mpfr_expm1, child_expm1f, (-pow(10, -d)), 1.0); } showResult(success); // fprintf(stderr, "log10f : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log10, child_log10f, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log10, child_log10f, d, 1.0); for(i=0;i<10000 && success;i++) checkAccuracy_f(mpfr_log10, child_log10f, (FLT_MIN * pow(0.996323, i)), 1.0); showResult(success); // fprintf(stderr, "log2f : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log2, child_log2f, d, 1.0); for(d = 0.0001;d < 10000 && success;d += 1.1) checkAccuracy_f(mpfr_log2, child_log2f, d, 1.0); for(i=0;i<10000 && success;i++) checkAccuracy_f(mpfr_log2, child_log2f, (FLT_MIN * pow(0.996323, i)), 1.0); showResult(success); // fprintf(stderr, "log1pf : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log1p, child_log1pf, d, 1.0); showResult(success); // fprintf(stderr, "lgammaf_u1 : "); for(d = -5000;d < 5000 && success;d += 1.1) checkAccuracy_f(mpfr_lgamma_nosign, child_lgammaf_u1, d, 1.0); showResult(success); // fprintf(stderr, "tgammaf_u1 : "); for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_gamma, child_tgammaf_u1, d, 1.0); showResult(success); // fprintf(stderr, "erff_u1 : "); for(d = -100;d < 100 && success;d += 0.02) checkAccuracy_f(mpfr_erf, child_erff_u1, d, 1.0); showResult(success); // fprintf(stderr, "erfcf_u15 : "); for(d = -1;d < 8 && success;d += 0.001) checkAccuracy_f(mpfr_erfc, child_erfcf_u15, d, 1.5); showResult(success); } } int main(int argc, char **argv) { char *argv2[argc+2], *commandSde = NULL; int i, a2s; // BUGFIX: this flush is to prevent incorrect syncing with the // `iut*` executable that causes failures in the CPU detection on // some CI systems. fflush(stdout); for(a2s=1;a2s #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #ifdef ENABLE_SYS_getrandom #define _GNU_SOURCE #include #include #include #endif #include "sleef.h" #include "testerutil.h" #define DORENAME #include "rename.h" #define DENORMAL_DBL_MIN (4.9406564584124654418e-324) #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) typedef union { double d; uint64_t u64; int64_t i64; } conv_t; double nexttoward0(double x, int n) { union { double f; uint64_t u; } cx; cx.f = x; cx.u -=n ; return cx.f; } double rnd() { conv_t c; switch(random() & 63) { case 0: return nexttoward0( 0.0, -(random() & ((1 << (random() & 31)) - 1))); case 1: return nexttoward0(-0.0, -(random() & ((1 << (random() & 31)) - 1))); case 2: return nexttoward0( INFINITY, (random() & ((1 << (random() & 31)) - 1))); case 3: return nexttoward0(-INFINITY, (random() & ((1 << (random() & 31)) - 1))); } #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif return c.d; } double rnd_fr() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif } while(!isnumber(c.d)); return c.d; } double rnd_zo() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif } while(!isnumber(c.d) || c.d < -1 || 1 < c.d); return c.d; } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(1280); mpfr_inits(frw, frx, fry, frz, NULL); conv_t cd; double d, t; double d2, d3, zo; int cnt, ecnt = 0; srandom(time(NULL)); for(cnt = 0;ecnt < 1000;cnt++) { switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 1: cd.d = rint(rnd_zo() * 1e+10) * M_PI_4; cd.i64 += (random() & 0xff) - 0x7f; d = cd.d; d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 2: cd.d = rnd_fr() * M_PI_4; cd.i64 += (random() & 0xf) - 0x7; d = cd.d; d2 = rnd(); d3 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); d3 = rnd_fr(); zo = rnd_zo(); break; } Sleef_double2 sc = xsincospi_u05(d); Sleef_double2 sc2 = xsincospi_u35(d); { const double rangemax2 = 1e+9/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinpi(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = sc.x, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospi_u05 sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = sc2.x, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 1.5) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospi_u35 sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2dp(t = xsinpi_u05(d), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sinpi_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } { const double rangemax2 = 1e+9/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_cospi(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = sc.y, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospi_u05 cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = sc.y, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 1.5) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospi_u35 cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2dp(t = xcospi_u05(d), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cospi_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } sc = xsincos(d); sc2 = xsincos_u1(d); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xsin(d), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sin arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } double u1 = countULPdp(sc.x, frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincos sin arg=%.20g ulp=%.20g\n", d, u1); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } double u2 = countULPdp(t = xsin_u1(d), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sin_u1 arg=%.20g ulp=%.20g\n", d, u2); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } double u3 = countULPdp(t = sc2.x, frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincos_u1 sin arg=%.20g ulp=%.20g\n", d, u3); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xcos(d), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = sc.y, frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincos cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULPdp(t = xcos_u1(d), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cos_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULPdp(t = sc2.y, frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincos_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xtan(d), frx); if (u0 != 0 && (u0 > 3.5 || isnan(t))) { printf("Pure C tan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xtan_u1(d), frx); if (u1 != 0 && (u1 > 1 || isnan(t))) { printf("Pure C tan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xlog(fabs(d)), frx); if (u0 > 3.5) { printf("Pure C log arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xlog_u1(fabs(d)), frx); if (u1 > 1) { printf("Pure C log_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xlog10(fabs(d)), frx); if (u0 > 1) { printf("Pure C log10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log2(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xlog2(fabs(d)), frx); if (u0 > 1) { printf("Pure C log2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xlog1p(d), frx); if ((-1 <= d && d <= 1e+307 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+307 && !(u0 <= 1 || isinf(t)))) { printf("Pure C log1p arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xexp(d), frx); if (u0 > 1) { printf("Pure C exp arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xexp2(d), frx); if (u0 > 1) { printf("Pure C exp2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xexp10(d), frx); if (u0 > 1.09) { printf("Pure C exp10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xexpm1(d), frx); if (u0 > 1) { printf("Pure C expm1 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULPdp(t = xpow(d2, d), frx); if (u0 > 1) { printf("Pure C pow arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xcbrt(d), frx); if (u0 > 3.5) { printf("Pure C cbrt arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xcbrt_u1(d), frx); if (u1 > 1) { printf("Pure C cbrt_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xasin(zo), frx); if (u0 > 3.5) { printf("Pure C asin arg=%.20g ulp=%.20g\n", zo, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xasin_u1(zo), frx); if (u1 > 1) { printf("Pure C asin_u1 arg=%.20g ulp=%.20g\n", zo, u1); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xacos(zo), frx); if (u0 > 3.5) { printf("Pure C acos arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xacos_u1(zo), frx); if (u1 > 1) { printf("Pure C acos_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xatan(d), frx); if (u0 > 3.5) { printf("Pure C atan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = xatan_u1(d), frx); if (u1 > 1) { printf("Pure C atan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULPdp(t = xatan2(d2, d), frx); if (u0 > 3.5) { printf("Pure C atan2 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = xatan2_u1(d2, d), frx); if (u1 > 1) { printf("Pure C atan2_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xsinh(d), frx); if ((fabs(d) <= 709 && u0 > 1) || (d > 709 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C sinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xcosh(d), frx); if ((fabs(d) <= 709 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf("Pure C cosh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xtanh(d), frx); if (u0 > 1) { printf("Pure C tanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xsinh_u35(d), frx); if ((fabs(d) <= 709 && u0 > 3.5) || (d > 709 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { printf("Pure C sinh_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xcosh_u35(d), frx); if ((fabs(d) <= 709 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { printf("Pure C cosh_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xtanh_u35(d), frx); if (u0 > 3.5) { printf("Pure C tanh_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xasinh(d), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C asinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xacosh(d), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !isnan(t))) { printf("Pure C acosh arg=%.20g ulp=%.20g\n", d, u0); printf("%.20g\n", t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xatanh(d), frx); if (u0 > 1) { printf("Pure C atanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } // { mpfr_set_d(frx, d, GMP_RNDN); mpfr_abs(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xfabs(d), frx); if (u0 != 0) { printf("Pure C fabs arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_copysign(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xcopysign(d, d2), frx); if (u0 != 0 && !isnan(d2)) { printf("Pure C copysign arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_max(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xfmax(d, d2), frx); if (u0 != 0) { printf("Pure C fmax arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_min(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xfmin(d, d2), frx); if (u0 != 0) { printf("Pure C fmin arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_dim(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xfdim(d, d2), frx); if (u0 > 0.5) { printf("Pure C fdim arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_trunc(frx, frx); double u0 = countULPdp(t = xtrunc(d), frx); if (u0 != 0) { printf("Pure C trunc arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_floor(frx, frx); double u0 = countULPdp(t = xfloor(d), frx); if (u0 != 0) { printf("Pure C floor arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_ceil(frx, frx); double u0 = countULPdp(t = xceil(d), frx); if (u0 != 0) { printf("Pure C ceil arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_round(frx, frx); double u0 = countULPdp(t = xround(d), frx); if (u0 != 0) { printf("Pure C round arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_rint(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xrint(d), frx); if (u0 != 0) { printf("Pure C rint arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_set_d(frz, d3, GMP_RNDN); mpfr_fma(frx, frx, fry, frz, GMP_RNDN); double u0 = countULP2dp(t = xfma(d, d2, d3), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+303 < c && c < 1e+303 && u0 > 0.5) || !(u0 <= 0.5 || isinf(t))) { printf("Pure C fma arg=%.20g, %.20g, %.20g ulp=%.20g\n", d, d2, d3, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = xsqrt_u05(d), frx); if (u0 > 0.50001) { printf("Pure C sqrt_u05 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2dp(t = xhypot_u05(d, d2), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 > 0.5) { printf("Pure C hypot arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2dp(t = xhypot_u35(d, d2), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+308 < c && c < 1e+308 && u0 > 3.5) || !(u0 <= 3.5 || isinf(t))) { printf("Pure C hypot arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { t = xnextafter(d, d2); double c = nextafter(d, d2); if (!(isnan(t) && isnan(c)) && t != c) { printf("Pure C nextafter arg=%.20g, %.20g\n", d, d2); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, 0); double u0 = countULPdp(t = xfrfrexp(d), frx); if (d != 0 && isnumber(d) && u0 != 0) { printf("Pure C frfrexp arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int cexp = mpfr_get_exp(frx); int texp = xexpfrexp(d); if (d != 0 && isnumber(d) && cexp != texp) { printf("Pure C expfrexp arg=%.20g\n", d); printf("correct = %d, test = %d\n", cexp, texp); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_fmod(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = xfmod(d, d2), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabsl((long double)d / d2) < 1e+300 && u0 > 0.5) { printf("Pure C fmod arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { int exp = (random() & 8191) - 4096; mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, mpfr_get_exp(frx) + exp); double u0 = countULPdp(t = xldexp(d, exp), frx); if (u0 > 0.5) { printf("Pure C ldexp arg=%.20g %d ulp=%.20g\n", d, exp, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_modf(fry, frz, frx, GMP_RNDN); Sleef_double2 t2 = xmodf(d); double u0 = countULPdp(t2.x, frz); double u1 = countULPdp(t2.y, fry); if (u0 != 0 || u1 != 0) { printf("Pure C modf arg=%.20g ulp=%.20g %.20g\n", d, u0, u1); printf("correct = %.20g, %.20g\n", mpfr_get_d(frz, GMP_RNDN), mpfr_get_d(fry, GMP_RNDN)); printf("test = %.20g, %.20g\n", t2.x, t2.y); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int s; mpfr_lgamma(frx, &s, frx, GMP_RNDN); double u0 = countULPdp(t = xlgamma_u1(d), frx); if (((d < 0 && fabsl(t - mpfr_get_ld(frx, GMP_RNDN)) > 1e-15 && u0 > 1) || (0 <= d && d < 2e+305 && u0 > 1) || (2e+305 <= d && !(u0 <= 1 || isinf(t))))) { printf("Pure C xlgamma_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_gamma(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = xtgamma_u1(d), frx); if (u0 > 1.0) { printf("Pure C xtgamma_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erfc(frx, frx, GMP_RNDN); static double ebz = 9.8813129168249308835e-324; // nextafter(nextafter(0, 1), 1); double u0 = countULP2dp(t = xerfc_u15(d), frx); if ((d > 26.2 && u0 > 2.5 && !(mpfr_get_d(frx, GMP_RNDN) == 0 && t <= ebz)) || (d <= 26.2 && u0 > 1.5)) { printf("Pure C xerfc_u15 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erf(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = xerf_u1(d), frx); if (u0 > 1) { printf("Pure C xerf_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } exit(0); } sleef-3.3.1/src/libm-tester/tester2ld.c000066400000000000000000000373451333715643700177360ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include "misc.h" #ifdef ENABLE_SYS_getrandom #define _GNU_SOURCE #include #include #include #endif #include "sleef.h" #define DORENAME #include "rename.h" #define DENORMAL_LDBL_MIN (3.6451995318824746025284059336194e-4951L) #define XLDBL_MIN (3.3621031431120935062626778173218e-4932L) #ifndef M_PIl #define M_PIl 3.141592653589793238462643383279502884L #endif #ifndef M_PI_4l #define M_PI_4l .785398163397448309615660845819875721049292L #endif #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) int isnumberl(long double x) { return x != INFINITYl && x != -INFINITYl && x == x; } int isPlusZerol(long double x) { return x == 0 && copysignl(1, x) == 1; } int isMinusZerol(long double x) { return x == 0 && copysignl(1, x) == -1; } mpfr_t fra, frb, frc, frd; double countULP(long double d, mpfr_t c) { long double c2 = mpfr_get_ld(c, GMP_RNDN); if (c2 == 0 && d != 0) return 10000; //if (isPlusZerol(c2) && !isPlusZerol(d)) return 10003; //if (isMinusZerol(c2) && !isMinusZerol(d)) return 10004; if (isnanl(c2) && isnanl(d)) return 0; if (isnanl(c2) || isnanl(d)) return 10001; if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; if (!isnumberl(c2) && !isnumberl(d)) return 0; int e; frexpl(mpfr_get_ld(c, GMP_RNDN), &e); mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-64), DENORMAL_LDBL_MIN), GMP_RNDN); mpfr_set_ld(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); return u; } double countULP2(long double d, mpfr_t c) { long double c2 = mpfr_get_ld(c, GMP_RNDN); if (c2 == 0 && d != 0) return 10000; //if (isPlusZerol(c2) && !isPlusZerol(d)) return 10003; //if (isMinusZerol(c2) && !isMinusZerol(d)) return 10004; if (isnanl(c2) && isnanl(d)) return 0; if (isnanl(c2) || isnanl(d)) return 10001; if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; if (!isnumberl(c2) && !isnumberl(d)) return 0; int e; frexpl(mpfr_get_ld(c, GMP_RNDN), &e); mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-64), LDBL_MIN), GMP_RNDN); mpfr_set_ld(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); return u; } typedef union { long double d; __int128 u128; } conv_t; long double rnd() { conv_t c; switch(random() & 15) { case 0: return INFINITY; case 1: return -INFINITY; } #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); #else c.u128 = random() | ((__int128)random() << 31) | ((__int128)random() << (31*2)) | ((__int128)random() << (31*3)) | ((__int128)random() << (31*4)); #endif return c.d; } long double rnd_fr() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); #else c.u128 = random() | ((__int128)random() << 31) | ((__int128)random() << (31*2)) | ((__int128)random() << (31*3)) | ((__int128)random() << (31*4)); #endif } while(!isnumberl(c.d)); return c.d; } long double rnd_zo() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); #else c.u128 = random() | ((__int128)random() << 31) | ((__int128)random() << (31*2)) | ((__int128)random() << (31*3)) | ((__int128)random() << (31*4)); #endif } while(!isnumberl(c.d) || c.d < -1 || 1 < c.d); return c.d; } void sinpifr(mpfr_t ret, long double d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_ld(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_sin(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } void cospifr(mpfr_t ret, long double d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_ld(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_cos(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(256); mpfr_inits(fra, frb, frc, frd, frw, frx, fry, frz, NULL); conv_t cd; long double d, t, d2, zo; int cnt, ecnt = 0; srandom(time(NULL)); #if 0 cd.d = M_PIl; mpfr_set_ld(frx, cd.d, GMP_RNDN); cd.u128 += 3; printf("%g\n", countULP2(cd.d, frx)); #endif const long double rangemax = 1e+9; for(cnt = 0;ecnt < 1000;cnt++) { switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); zo = rnd(); break; case 1: cd.d = rint((2 * (double)random() / RAND_MAX - 1) * 1e+10) * M_PI_4; cd.u128 += (random() & 0xff) - 0x7f; d = cd.d; d2 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); zo = rnd_zo(); break; } Sleef_longdouble2 sc = xsincospil_u05(d); Sleef_longdouble2 sc2 = xsincospil_u35(d); { const double rangemax2 = 1e+9; sinpifr(frx, d); double u0 = countULP2(t = sc.x, frx); if (u0 != 0 && ((fabsl(d) <= rangemax2 && u0 > 0.505) || fabsl(t) > 1 || !isnumberl(t))) { printf("Pure C sincospil_u05 sin arg=%.30Lg ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = sc2.x, frx); if (u1 != 0 && ((fabsl(d) <= rangemax2 && u1 > 1.5) || fabsl(t) > 1 || !isnumberl(t))) { printf("Pure C sincospil_u35 sin arg=%.30Lg ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { const double rangemax2 = 1e+9; cospifr(frx, d); double u0 = countULP2(t = sc.y, frx); if (u0 != 0 && ((fabsl(d) <= rangemax2 && u0 > 0.505) || fabsl(t) > 1 || !isnumberl(t))) { printf("Pure C sincospil_u05 cos arg=%.30Lg ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = sc.y, frx); if (u1 != 0 && ((fabsl(d) <= rangemax2 && u1 > 1.5) || fabsl(t) > 1 || !isnumberl(t))) { printf("Pure C sincospil_u35 cos arg=%.30Lg ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } #if 0 double2 sc = xsincos(d); double2 sc2 = xsincos_u1(d); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); double u0 = countULP(t = xsin(d), frx); if ((fabsl(d) <= rangemax && u0 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(sc.x, frx); if ((fabsl(d) <= rangemax && u1 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sincos sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP(t = xsin_u1(d), frx); if ((fabsl(d) <= rangemax && u2 > 1) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sin_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULP(t = sc2.x, frx); if ((fabsl(d) <= rangemax && u3 > 1) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sincos_u1 sin arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); double u0 = countULP(t = xcos(d), frx); if ((fabsl(d) <= rangemax && u0 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = sc.y, frx); if ((fabsl(d) <= rangemax && u1 > 3.5) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sincos cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP(t = xcos_u1(d), frx); if ((fabsl(d) <= rangemax && u2 > 1) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C cos_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULP(t = sc2.y, frx); if ((fabsl(d) <= rangemax && u3 > 1) || fabsl(t) > 1 || !isnumberl(t)) { printf("Pure C sincos_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); double u0 = countULP(t = xtan(d), frx); if ((fabsl(d) < 1e+7 && u0 > 3.5) || (fabsl(d) <= rangemax && u0 > 5) || isnan(t)) { printf("Pure C tan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xtan_u1(d), frx); if ((fabsl(d) <= rangemax && u1 > 1) || isnan(t)) { printf("Pure C tan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } d = rnd_fr(); double d2 = rnd_fr(), zo = rnd_zo(); { mpfr_set_d(frx, fabsl(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog(fabsl(d)), frx); if (u0 > 3.5) { printf("Pure C log arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xlog_u1(fabsl(d)), frx); if (u1 > 1) { printf("Pure C log_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsl(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog10(fabsl(d)), frx); if (u0 > 1) { printf("Pure C log10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog1p(d), frx); if ((-1 <= d && d <= 1e+307 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+307 && !(u0 <= 1 || isinf(t)))) { printf("Pure C log1p arg=%.20g ulp=%.20g\n", d, u0); printf("%g\n", t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp(d), frx); if (u0 > 1) { printf("Pure C exp arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp2(d), frx); if (u0 > 1) { printf("Pure C exp2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp10(d), frx); if (u0 > 1) { printf("Pure C exp10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULP(t = xexpm1(d), frx); if (u0 > 1) { printf("Pure C expm1 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULP(t = xpow(d2, d), frx); if (u0 > 1) { printf("Pure C pow arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULP(t = xcbrt(d), frx); if (u0 > 3.5) { printf("Pure C cbrt arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xcbrt_u1(d), frx); if (u1 > 1) { printf("Pure C cbrt_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULP(t = xasin(zo), frx); if (u0 > 3.5) { printf("Pure C asin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xasin_u1(zo), frx); if (u1 > 1) { printf("Pure C asin_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULP(t = xacos(zo), frx); if (u0 > 3.5) { printf("Pure C acos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xacos_u1(zo), frx); if (u1 > 1) { printf("Pure C acos_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULP(t = xatan(d), frx); if (u0 > 3.5) { printf("Pure C atan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xatan_u1(d), frx); if (u1 > 1) { printf("Pure C atan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULP(t = xatan2(d2, d), frx); if (u0 > 3.5) { printf("Pure C atan2 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = xatan2_u1(d2, d), frx); if (u1 > 1) { printf("Pure C atan2_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULP(t = xsinh(d), frx); if ((fabsl(d) <= 709 && u0 > 1) || (d > 709 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C sinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULP(t = xcosh(d), frx); if ((fabsl(d) <= 709 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf("Pure C cosh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULP(t = xtanh(d), frx); if (u0 > 1) { printf("Pure C tanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULP(t = xasinh(d), frx); if ((fabsl(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C asinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULP(t = xacosh(d), frx); if ((fabsl(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !isnan(t))) { printf("Pure C acosh arg=%.20g ulp=%.20g\n", d, u0); printf("%.20g\n", t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULP(t = xatanh(d), frx); if (u0 > 1) { printf("Pure C atanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } #endif } } sleef-3.3.1/src/libm-tester/tester2qp.c000066400000000000000000000356441333715643700177570ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #include #define _GNU_SOURCE #include #include #include #include "sleef.h" #include "f128util.h" #define DORENAME #include "rename.h" #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) int isnumberq(Sleef_quad x) { return !isinfq(x) && !isnanq(x); } int isPlusZeroq(Sleef_quad x) { return x == 0 && copysignq(1, x) == 1; } int isMinusZeroq(Sleef_quad x) { return x == 0 && copysignq(1, x) == -1; } mpfr_t fra, frb, frc, frd; double countULP(Sleef_quad d, mpfr_t c) { Sleef_quad c2 = mpfr_get_f128(c, GMP_RNDN); if (c2 == 0 && d != 0) return 10000; //if (isPlusZeroq(c2) && !isPlusZeroq(d)) return 10003; //if (isMinusZeroq(c2) && !isMinusZeroq(d)) return 10004; if (isnanq(c2) && isnanq(d)) return 0; if (isnanq(c2) || isnanq(d)) return 10001; if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; if (!isnumberq(c2) && !isnumberq(d)) return 0; int e; frexpq(mpfr_get_f128(c, GMP_RNDN), &e); mpfr_set_f128(frb, fmaxq(ldexpq(1.0, e-113), FLT128_DENORM_MIN), GMP_RNDN); mpfr_set_f128(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); return u; } double countULP2(Sleef_quad d, mpfr_t c) { Sleef_quad c2 = mpfr_get_f128(c, GMP_RNDN); if (c2 == 0 && d != 0) return 10000; //if (isPlusZeroq(c2) && !isPlusZeroq(d)) return 10003; //if (isMinusZeroq(c2) && !isMinusZeroq(d)) return 10004; if (isnanq(c2) && isnanq(d)) return 0; if (isnanq(c2) || isnanq(d)) return 10001; if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) return 0; if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) return 0; if (!isnumberq(c2) && !isnumberq(d)) return 0; int e; frexpq(mpfr_get_f128(c, GMP_RNDN), &e); mpfr_set_f128(frb, fmaxq(ldexpq(1.0, e-113), FLT128_MIN), GMP_RNDN); mpfr_set_f128(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); return u; } typedef union { Sleef_quad d; __int128 u128; uint64_t u[2]; } conv_t; Sleef_quad rnd() { conv_t c; switch(random() & 15) { case 0: return INFINITY; case 1: return -INFINITY; } syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); return c.d; } Sleef_quad rnd_fr() { conv_t c; do { syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); } while(!isnumberq(c.d)); return c.d; } Sleef_quad rnd_zo() { conv_t c; do { syscall(SYS_getrandom, &c.u128, sizeof(c.u128), 0); } while(!isnumberq(c.d) || c.d < -1 || 1 < c.d); return c.d; } void sinpifr(mpfr_t ret, Sleef_quad d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_f128(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_sin(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } void cospifr(mpfr_t ret, Sleef_quad d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_f128(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_cos(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(2048); mpfr_inits(fra, frb, frc, frd, frw, frx, fry, frz, NULL); conv_t cd; Sleef_quad d, t, d2, zo; int cnt, ecnt = 0; srandom(time(NULL)); #if 0 cd.d = M_PIq; mpfr_set_f128(frx, cd.d, GMP_RNDN); cd.u128 += 3; printf("%g\n", countULP2(cd.d, frx)); #endif const Sleef_quad rangemax = 1e+9; for(cnt = 0;ecnt < 1000;cnt++) { switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); zo = rnd(); break; case 1: cd.d = rint((2 * (double)random() / RAND_MAX - 1) * 1e+10) * M_PI_4; cd.u128 += (random() & 0xff) - 0x7f; d = cd.d; d2 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); zo = rnd_zo(); break; } Sleef_quad2 sc = xsincospiq_u05(d); Sleef_quad2 sc2 = xsincospiq_u35(d); { const double rangemax2 = 1e+9; sinpifr(frx, d); double u0 = countULP2(t = sc.x, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumberq(t))) { printf("Pure C sincospiq_u05 sin arg="); printf128(d); printf(" ulp=%.20g\n", u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = sc2.x, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumberq(t))) { printf("Pure C sincospiq_u35 sin arg=%.30Lg ulp=%.20g\n", (long double)d, u1); fflush(stdout); ecnt++; } } { const double rangemax2 = 1e+9; cospifr(frx, d); double u0 = countULP2(t = sc.y, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumberq(t))) { printf("Pure C sincospiq_u05 cos arg=%.30Lg ulp=%.20g\n", (long double)d, u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = sc.y, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumberq(t))) { printf("Pure C sincospiq_u35 cos arg=%.30Lg ulp=%.20g\n", (long double)d, u1); fflush(stdout); ecnt++; } } #if 0 double2 sc = xsincos(d); double2 sc2 = xsincos_u1(d); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); double u0 = countULP(t = xsin(d), frx); if ((fabs(d) <= rangemax && u0 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(sc.x, frx); if ((fabs(d) <= rangemax && u1 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sincos sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP(t = xsin_u1(d), frx); if ((fabs(d) <= rangemax && u2 > 1) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sin_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULP(t = sc2.x, frx); if ((fabs(d) <= rangemax && u3 > 1) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sincos_u1 sin arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); double u0 = countULP(t = xcos(d), frx); if ((fabs(d) <= rangemax && u0 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = sc.y, frx); if ((fabs(d) <= rangemax && u1 > 3.5) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sincos cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP(t = xcos_u1(d), frx); if ((fabs(d) <= rangemax && u2 > 1) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C cos_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULP(t = sc2.y, frx); if ((fabs(d) <= rangemax && u3 > 1) || fabs(t) > 1 || !isnumberq(t)) { printf("Pure C sincos_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); double u0 = countULP(t = xtan(d), frx); if ((fabs(d) < 1e+7 && u0 > 3.5) || (fabs(d) <= rangemax && u0 > 5) || isnan(t)) { printf("Pure C tan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xtan_u1(d), frx); if ((fabs(d) <= rangemax && u1 > 1) || isnan(t)) { printf("Pure C tan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } d = rnd_fr(); double d2 = rnd_fr(), zo = rnd_zo(); { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog(fabs(d)), frx); if (u0 > 3.5) { printf("Pure C log arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xlog_u1(fabs(d)), frx); if (u1 > 1) { printf("Pure C log_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog10(fabs(d)), frx); if (u0 > 1) { printf("Pure C log10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULP(t = xlog1p(d), frx); if ((-1 <= d && d <= 1e+307 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+307 && !(u0 <= 1 || isinf(t)))) { printf("Pure C log1p arg=%.20g ulp=%.20g\n", d, u0); printf("%g\n", t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp(d), frx); if (u0 > 1) { printf("Pure C exp arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp2(d), frx); if (u0 > 1) { printf("Pure C exp2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULP(t = xexp10(d), frx); if (u0 > 1) { printf("Pure C exp10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULP(t = xexpm1(d), frx); if (u0 > 1) { printf("Pure C expm1 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULP(t = xpow(d2, d), frx); if (u0 > 1) { printf("Pure C pow arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULP(t = xcbrt(d), frx); if (u0 > 3.5) { printf("Pure C cbrt arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xcbrt_u1(d), frx); if (u1 > 1) { printf("Pure C cbrt_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULP(t = xasin(zo), frx); if (u0 > 3.5) { printf("Pure C asin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xasin_u1(zo), frx); if (u1 > 1) { printf("Pure C asin_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULP(t = xacos(zo), frx); if (u0 > 3.5) { printf("Pure C acos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xacos_u1(zo), frx); if (u1 > 1) { printf("Pure C acos_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULP(t = xatan(d), frx); if (u0 > 3.5) { printf("Pure C atan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP(t = xatan_u1(d), frx); if (u1 > 1) { printf("Pure C atan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULP(t = xatan2(d2, d), frx); if (u0 > 3.5) { printf("Pure C atan2 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2(t = xatan2_u1(d2, d), frx); if (u1 > 1) { printf("Pure C atan2_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULP(t = xsinh(d), frx); if ((fabs(d) <= 709 && u0 > 1) || (d > 709 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C sinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULP(t = xcosh(d), frx); if ((fabs(d) <= 709 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf("Pure C cosh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULP(t = xtanh(d), frx); if (u0 > 1) { printf("Pure C tanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULP(t = xasinh(d), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C asinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULP(t = xacosh(d), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !isnan(t))) { printf("Pure C acosh arg=%.20g ulp=%.20g\n", d, u0); printf("%.20g\n", t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULP(t = xatanh(d), frx); if (u0 > 1) { printf("Pure C atanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } #endif } } sleef-3.3.1/src/libm-tester/tester2simddp.c000066400000000000000000000706151333715643700206140ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #ifdef ENABLE_SYS_getrandom #define _GNU_SOURCE #include #include #endif #include "sleef.h" #include "testerutil.h" #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #include "renamesse2.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_SSE4 #define CONFIG 4 #include "helpersse2.h" #include "renamesse4.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #include "renameavx.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #include "renamefma4.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #include "renameavx2.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #include "renameavx2128.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #include "renameavx512f.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif #ifdef ENABLE_VECEXT #define CONFIG 1 #include "helpervecext.h" #include "norename.h" #endif #ifdef ENABLE_PUREC #define CONFIG 1 #include "helperpurec.h" #include "norename.h" #endif #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #include "renameadvsimd.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #include "renamesve.h" typedef Sleef_svfloat64_t_2 vdouble2; typedef Sleef_svfloat32_t_2 vfloat2; #endif /* ENABLE_SVE */ #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #include "renamevsx.h" typedef Sleef_vector_double_2 vdouble2; typedef Sleef_vector_float_2 vfloat2; #endif // #define DENORMAL_DBL_MIN (4.9406564584124654418e-324) #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) typedef union { double d; uint64_t u64; int64_t i64; } conv_t; double nexttoward0(double x, int n) { union { double f; uint64_t u; } cx; cx.f = x; cx.u -= n; return cx.f; } double rnd() { conv_t c; switch(random() & 63) { case 0: return nexttoward0( 0.0, -(random() & ((1 << (random() & 31)) - 1))); case 1: return nexttoward0(-0.0, -(random() & ((1 << (random() & 31)) - 1))); case 2: return nexttoward0( INFINITY, (random() & ((1 << (random() & 31)) - 1))); case 3: return nexttoward0(-INFINITY, (random() & ((1 << (random() & 31)) - 1))); } #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif return c.d; } double rnd_fr() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif } while(!isnumber(c.d)); return c.d; } double rnd_zo() { conv_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u64, sizeof(c.u64), 0); #else c.u64 = random() | ((uint64_t)random() << 31) | ((uint64_t)random() << 62); #endif } while(!isnumber(c.d) || c.d < -1 || 1 < c.d); return c.d; } void sinpifr(mpfr_t ret, double d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_d(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_sin(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } void cospifr(mpfr_t ret, double d) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_set_d(frd, d, GMP_RNDN); mpfr_mul(frd, frpi, frd, GMP_RNDN); mpfr_cos(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } vdouble vset(vdouble v, int idx, double d) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); a[idx] = d; return vloadu_vd_p(a); } double vget(vdouble v, int idx) { double a[VECTLENDP]; vstoreu_v_p_vd(a, v); return a[idx]; } int vgeti(vint v, int idx) { int a[VECTLENDP*2]; vstoreu_v_p_vi(a, v); return a[idx]; } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(256); mpfr_inits(frw, frx, fry, frz, NULL); conv_t cd; double d, t; double d2, d3, zo; vdouble vd = vcast_vd_d(0); vdouble vd2 = vcast_vd_d(0); vdouble vd3 = vcast_vd_d(0); vdouble vzo = vcast_vd_d(0); vdouble vad = vcast_vd_d(0); vdouble2 sc, sc2; int cnt, ecnt = 0; srandom(time(NULL)); for(cnt = 0;ecnt < 1000;cnt++) { int e = cnt % VECTLENDP; switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 1: cd.d = rint(rnd_zo() * 1e+10) * M_PI_4; cd.i64 += (random() & 0xff) - 0x7f; d = cd.d; d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 2: cd.d = rnd_fr() * M_PI_4; cd.i64 += (random() & 0xf) - 0x7; d = cd.d; d2 = rnd(); d3 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); d3 = rnd_fr(); zo = rnd_zo(); break; } vd = vset(vd, e, d); vd2 = vset(vd2, e, d2); vd3 = vset(vd3, e, d3); vzo = vset(vzo, e, zo); vad = vset(vad, e, fabs(d)); // sc = xsincospi_u05(vd); sc2 = xsincospi_u35(vd); { const double rangemax2 = 1e+9/4; sinpifr(frx, d); double u0 = countULP2dp(t = vget(sc.x, e), frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospi_u05 sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = vget(sc2.x, e), frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 1.5) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospi_u35 sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2dp(t = vget(xsinpi_u05(vd), e), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sinpi_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } { const double rangemax2 = 1e+9/4; cospifr(frx, d); double u0 = countULP2dp(t = vget(sc.y, e), frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospi_u05 cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = vget(sc.y, e), frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 1.5) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospi_u35 cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2dp(t = vget(xcospi_u05(vd), e), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cospi_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } sc = xsincos(vd); sc2 = xsincos_u1(vd); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsin(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(sc.x, e), frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincos sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULPdp(t = vget(xsin_u1(vd), e), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sin_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULPdp(t = vget(sc2.x, e), frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincos_u1 sin arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xcos(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(sc.y, e), frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincos cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULPdp(t = vget(xcos_u1(vd), e), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cos_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } double u3 = countULPdp(t = vget(sc2.y, e), frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincos_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xtan(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || isnan(t))) { printf(ISANAME " tan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xtan_u1(vd), e), frx); if (u1 != 0 && (u1 > 1 || isnan(t))) { printf(ISANAME " tan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xlog(vad), e), frx); if (u0 > 3.5) { printf(ISANAME " log arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xlog_u1(vad), e), frx); if (u1 > 1) { printf(ISANAME " log_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xlog10(vad), e), frx); if (u0 > 1) { printf(ISANAME " log10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabs(d), GMP_RNDN); mpfr_log2(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xlog2(vad), e), frx); if (u0 > 1) { printf(ISANAME " log2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xlog1p(vd), e), frx); if ((-1 <= d && d <= 1e+307 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+307 && !(u0 <= 1 || isinf(t)))) { printf(ISANAME " log1p arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xexp(vd), e), frx); if (u0 > 1) { printf(ISANAME " exp arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xexp2(vd), e), frx); if (u0 > 1) { printf(ISANAME " exp2 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xexp10(vd), e), frx); if (u0 > 1.09) { printf(ISANAME " exp10 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xexpm1(vd), e), frx); if (u0 > 1) { printf(ISANAME " expm1 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xpow(vd2, vd), e), frx); if (u0 > 1) { printf(ISANAME " pow arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xcbrt(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " cbrt arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xcbrt_u1(vd), e), frx); if (u1 > 1) { printf(ISANAME " cbrt_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xasin(vzo), e), frx); if (u0 > 3.5) { printf(ISANAME " asin arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xasin_u1(vzo), e), frx); if (u1 > 1) { printf(ISANAME " asin_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xacos(vzo), e), frx); if (u0 > 3.5) { printf(ISANAME " acos arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xacos_u1(vzo), e), frx); if (u1 > 1) { printf(ISANAME " acos_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xatan(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " atan arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPdp(t = vget(xatan_u1(vd), e), frx); if (u1 > 1) { printf(ISANAME " atan_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xatan2(vd2, vd), e), frx); if (u0 > 3.5) { printf(ISANAME " atan2 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2dp(t = vget(xatan2_u1(vd2, vd), e), frx); if (u1 > 1) { printf(ISANAME " atan2_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsinh(vd), e), frx); if ((fabs(d) <= 709 && u0 > 1) || (d > 709 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf(ISANAME " sinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xcosh(vd), e), frx); if ((fabs(d) <= 709 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf(ISANAME " cosh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xtanh(vd), e), frx); if (u0 > 1) { printf(ISANAME " tanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsinh_u35(vd), e), frx); if ((fabs(d) <= 709 && u0 > 3.5) || (d > 709 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || (d < -709 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { printf(ISANAME " sinh_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xcosh_u35(vd), e), frx); if ((fabs(d) <= 709 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { printf(ISANAME " cosh_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xtanh_u35(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " tanh_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xasinh(vd), e), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf(ISANAME " asinh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xacosh(vd), e), frx); if ((fabs(d) < sqrt(DBL_MAX) && u0 > 1) || (d >= sqrt(DBL_MAX) && !(u0 <= 1 || (isinf(t) && t > 0))) || (d <= -sqrt(DBL_MAX) && !isnan(t))) { printf(ISANAME " acosh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xatanh(vd), e), frx); if (u0 > 1) { printf(ISANAME " atanh arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } // { mpfr_set_d(frx, d, GMP_RNDN); mpfr_abs(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xfabs(vd), e), frx); if (u0 != 0) { printf(ISANAME " fabs arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_copysign(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xcopysign(vd, vd2), e), frx); if (u0 != 0 && !isnan(d2)) { printf(ISANAME " copysign arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_max(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xfmax(vd, vd2), e), frx); if (u0 != 0) { printf(ISANAME " fmax arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_min(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xfmin(vd, vd2), e), frx); if (u0 != 0) { printf(ISANAME " fmin arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_dim(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xfdim(vd, vd2), e), frx); if (u0 > 0.5) { printf(ISANAME " fdim arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_trunc(frx, frx); double u0 = countULPdp(t = vget(xtrunc(vd), e), frx); if (u0 != 0) { printf(ISANAME " trunc arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_floor(frx, frx); double u0 = countULPdp(t = vget(xfloor(vd), e), frx); if (u0 != 0) { printf(ISANAME " floor arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_ceil(frx, frx); double u0 = countULPdp(t = vget(xceil(vd), e), frx); if (u0 != 0) { printf(ISANAME " ceil arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_round(frx, frx); double u0 = countULPdp(t = vget(xround(vd), e), frx); if (u0 != 0) { printf(ISANAME " round arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_rint(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xrint(vd), e), frx); if (u0 != 0) { printf(ISANAME " rint arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_set_d(frz, d3, GMP_RNDN); mpfr_fma(frx, frx, fry, frz, GMP_RNDN); double u0 = countULP2dp(t = vget(xfma(vd, vd2, vd3), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+303 < c && c < 1e+303 && u0 > 0.5) || !(u0 <= 0.5 || isinf(t))) { printf(ISANAME " fma arg=%.20g, %.20g, %.20g ulp=%.20g\n", d, d2, d3, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsqrt(vd), e), frx); if (u0 > 1.0) { printf(ISANAME " sqrt arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsqrt_u05(vd), e), frx); if (u0 > 0.50001) { printf(ISANAME " sqrt_u05 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xsqrt_u35(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " sqrt_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2dp(t = vget(xhypot_u05(vd, vd2), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 > 0.5) { printf(ISANAME " hypot_u05 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2dp(t = vget(xhypot_u35(vd, vd2), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+308 < c && c < 1e+308 && u0 > 3.5) || !(u0 <= 3.5 || isinf(t))) { if (!(isinf(c) && t == 1.7976931348623157081e+308)) { printf(ISANAME " hypot_u35 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } { t = vget(xnextafter(vd, vd2), e); double c = nextafter(d, d2); if (!(isnan(t) && isnan(c)) && t != c) { printf(ISANAME " nextafter arg=%.20g, %.20g\n", d, d2); printf("correct = %.20g, test = %.20g\n", c, t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, 0); double u0 = countULPdp(t = vget(xfrfrexp(vd), e), frx); if (d != 0 && isnumber(d) && u0 != 0) { printf(ISANAME " frfrexp arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_fmod(frx, frx, fry, GMP_RNDN); double u0 = countULPdp(t = vget(xfmod(vd, vd2), e), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabsl((long double)d / d2) < 1e+300 && u0 > 0.5) { printf(ISANAME " fmod arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } /* { mpfr_set_d(frx, d, GMP_RNDN); int cexp = mpfr_get_exp(frx); int texp = vgeti(xexpfrexp(vd), e); if (isnumber(d) && cexp != texp) { printf(ISANAME " expfrexp arg=%.20g\n", d); fflush(stdout); ecnt++; } } { int exp = (random() & 8191) - 4096; mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, mpfr_get_exp(frx) + exp); double u0 = countULPdp(t = vget(xldexp(d, exp), e), frx); if (u0 > 0.5) { printf(ISANAME " ldexp arg=%.20g %d ulp=%.20g\n", d, exp, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } */ { mpfr_set_d(frx, d, GMP_RNDN); mpfr_modf(fry, frz, frx, GMP_RNDN); vdouble2 t2 = xmodf(vd); double u0 = countULPdp(vget(t2.x, e), frz); double u1 = countULPdp(vget(t2.y, e), fry); if (u0 != 0 || u1 != 0) { printf(ISANAME " modf arg=%.20g ulp=%.20g %.20g\n", d, u0, u1); printf("correct = %.20g, %.20g\n", mpfr_get_d(frz, GMP_RNDN), mpfr_get_d(fry, GMP_RNDN)); printf("test = %.20g, %.20g\n", vget(t2.x, e), vget(t2.y, e)); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int s; mpfr_lgamma(frx, &s, frx, GMP_RNDN); double u0 = countULPdp(t = vget(xlgamma_u1(vd), e), frx); if (((d < 0 && fabsl(t - mpfr_get_ld(frx, GMP_RNDN)) > 1e-15 && u0 > 1) || (0 <= d && d < 2e+305 && u0 > 1) || (2e+305 <= d && !(u0 <= 1 || isinf(t))))) { printf("Pure C xlgamma_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_gamma(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = vget(xtgamma_u1(vd), e), frx); if (u0 > 1.0) { printf("Pure C xtgamma_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erfc(frx, frx, GMP_RNDN); static double ebz = 9.8813129168249308835e-324; // nextafter(nextafter(0, 1), 1); double u0 = countULP2dp(t = vget(xerfc_u15(vd), e), frx); if ((d > 26.2 && u0 > 2.5 && !(mpfr_get_d(frx, GMP_RNDN) == 0 && t <= ebz)) || (d <= 26.2 && u0 > 1.5)) { printf("Pure C xerfc_u15 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erf(frx, frx, GMP_RNDN); double u0 = countULP2dp(t = vget(xerf_u1(vd), e), frx); if (u0 > 1) { printf("Pure C xerf_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } } sleef-3.3.1/src/libm-tester/tester2simdsp.c000066400000000000000000000672431333715643700206360ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #ifdef ENABLE_SYS_getrandom #define _GNU_SOURCE #include #include #include #endif #include "sleef.h" #include "testerutil.h" #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #include "renamesse2.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_SSE4 #define CONFIG 4 #include "helpersse2.h" #include "renamesse4.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #include "renameavx.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #include "renamefma4.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #include "renameavx2.h" typedef Sleef___m256d_2 vdouble2; typedef Sleef___m256_2 vfloat2; #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #include "renameavx2128.h" typedef Sleef___m128d_2 vdouble2; typedef Sleef___m128_2 vfloat2; #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #include "renameavx512f.h" typedef Sleef___m512d_2 vdouble2; typedef Sleef___m512_2 vfloat2; #endif #ifdef ENABLE_VECEXT #define CONFIG 1 #include "helpervecext.h" #include "norename.h" #endif #ifdef ENABLE_PUREC #define CONFIG 1 #include "helperpurec.h" #include "norename.h" #endif #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #include "renameadvsimd.h" typedef Sleef_float64x2_t_2 vdouble2; typedef Sleef_float32x4_t_2 vfloat2; #endif #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #include "renamesve.h" typedef Sleef_svfloat64_t_2 vdouble2; typedef Sleef_svfloat32_t_2 vfloat2; #endif /* ENABLE_SVE */ #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #include "renamevsx.h" typedef Sleef_vector_double_2 vdouble2; typedef Sleef_vector_float_2 vfloat2; #endif // #define DENORMAL_FLT_MIN (1.4012984643248170709e-45f) #define POSITIVE_INFINITYf ((float)INFINITY) #define NEGATIVE_INFINITYf (-(float)INFINITY) typedef union { double d; uint64_t u64; int64_t i64; } conv64_t; typedef union { float f; uint32_t u32; int32_t i32; } conv32_t; static float nexttoward0f(float x, int n) { union { float f; int32_t u; } cx; cx.f = x; cx.u -= n; return x == 0 ? 0 : cx.f; } float rnd() { conv32_t c; switch(random() & 63) { case 0: return nexttoward0f( 0.0, -(random() & ((1 << (random() & 31)) - 1))); case 1: return nexttoward0f(-0.0, -(random() & ((1 << (random() & 31)) - 1))); case 2: return nexttoward0f( INFINITY, (random() & ((1 << (random() & 31)) - 1))); case 3: return nexttoward0f(-INFINITY, (random() & ((1 << (random() & 31)) - 1))); } #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif return c.f; } float rnd_fr() { conv32_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif } while(!isnumber(c.f)); return c.f; } float rnd_zo() { conv32_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif } while(!isnumber(c.f) || c.f < -1 || 1 < c.f); return c.f; } vfloat vset(vfloat v, int idx, float d) { float a[VECTLENSP]; vstoreu_v_p_vf(a, v); a[idx] = d; return vloadu_vf_p(a); } float vget(vfloat v, int idx) { float a[VECTLENSP]; vstoreu_v_p_vf(a, v); return a[idx]; } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(256); mpfr_inits(frw, frx, fry, frz, NULL); conv32_t cd; float d, t; float d2, d3, zo; vfloat vd = vcast_vf_f(0); vfloat vd2 = vcast_vf_f(0); vfloat vd3 = vcast_vf_f(0); vfloat vzo = vcast_vf_f(0); vfloat vad = vcast_vf_f(0); vfloat2 sc, sc2; int cnt, ecnt = 0; srandom(time(NULL)); for(cnt = 0;ecnt < 1000;cnt++) { int e = cnt % VECTLENSP; switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 1: cd.f = rint(rnd_zo() * 1e+10) * M_PI_4; cd.i32 += (random() & 0xff) - 0x7f; d = cd.f; d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 2: cd.f = rnd_fr() * M_PI_4; cd.i32 += (random() & 0xf) - 0x7; d = cd.f; d2 = rnd(); d3 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); d3 = rnd_fr(); zo = rnd_zo(); break; } vd = vset(vd, e, d); vd2 = vset(vd2, e, d2); vd3 = vset(vd3, e, d3); vzo = vset(vzo, e, zo); vad = vset(vad, e, fabs(d)); sc = xsincospif_u05(vd); sc2 = xsincospif_u35(vd); { const double rangemax2 = 1e+7/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinpi(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = vget(sc.x, e), frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospif_u05 sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = vget(sc2.x, e), frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospif_u35 sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2sp(t = vget(xsinpif_u05(vd), e), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sinpif_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } { const double rangemax2 = 1e+7/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_cospi(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = vget(sc.y, e), frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospif_u05 cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = vget(sc.y, e), frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincospif_u35 cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2sp(t = vget(xcospif_u05(vd), e), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cospif_u05 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } } sc = xsincosf(vd); sc2 = xsincosf_u1(vd); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); float u0 = countULPsp(t = vget(xsinf(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sinf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = vget(sc.x, e), frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincosf sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } float u2 = countULPsp(t = vget(xsinf_u1(vd), e), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sinf_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } float u3 = countULPsp(t = vget(sc2.x, e), frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincosf_u1 sin arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); float u0 = countULPsp(t = vget(xcosf(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cosf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = vget(sc.y, e), frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincosf cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } float u2 = countULPsp(t = vget(xcosf_u1(vd), e), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " cosf_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } float u3 = countULPsp(t = vget(sc2.y, e), frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf(ISANAME " sincosf_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); float u0 = countULPsp(t = vget(xtanf(vd), e), frx); if (u0 != 0 && (u0 > 3.5 || isnan(t))) { printf(ISANAME " tanf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = vget(xtanf_u1(vd), e), frx); if (u1 != 0 && (u1 > 1 || isnan(t))) { printf(ISANAME " tanf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xlogf(vad), e), frx); if (u0 > 3.5) { printf(ISANAME " logf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xlogf_u1(vad), e), frx); if (u1 > 1) { printf(ISANAME " logf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xlog10f(vad), e), frx); if (u0 > 1) { printf(ISANAME " log10f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log2(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xlog2f(vad), e), frx); if (u0 > 1) { printf(ISANAME " log2f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xlog1pf(vd), e), frx); if ((-1 <= d && d <= 1e+38 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+38 && !(u0 <= 1 || isinf(t)))) { printf(ISANAME " log1pf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xexpf(vd), e), frx); if (u0 > 1) { printf(ISANAME " expf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xexp2f(vd), e), frx); if (u0 > 1) { printf(ISANAME " exp2f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xexp10f(vd), e), frx); if (u0 > 1) { printf(ISANAME " exp10f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xexpm1f(vd), e), frx); if (u0 > 1) { printf(ISANAME " expm1f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xpowf(vd2, vd), e), frx); if (u0 > 1) { printf(ISANAME " powf arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xcbrtf(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " cbrtf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xcbrtf_u1(vd), e), frx); if (u1 > 1) { printf(ISANAME " cbrtf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xasinf(vzo), e), frx); if (u0 > 3.5) { printf(ISANAME " asinf arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xasinf_u1(vzo), e), frx); if (u1 > 1) { printf(ISANAME " asinf_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xacosf(vzo), e), frx); if (u0 > 3.5) { printf(ISANAME " acosf arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xacosf_u1(vzo), e), frx); if (u1 > 1) { printf(ISANAME " acosf_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xatanf(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " atanf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = vget(xatanf_u1(vd), e), frx); if (u1 > 1) { printf(ISANAME " atanf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xatan2f(vd2, vd), e), frx); if (u0 > 3.5) { printf(ISANAME " atan2f arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = vget(xatan2f_u1(vd2, vd), e), frx); if (u1 > 1) { printf(ISANAME " atan2f_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xsinhf(vd), e), frx); if ((fabs(d) <= 88.5 && u0 > 1) || (d > 88.5 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -88.5 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf(ISANAME " sinhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xcoshf(vd), e), frx); if ((fabs(d) <= 88.5 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf(ISANAME " coshf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xtanhf(vd), e), frx); if (u0 > 1.0001) { printf(ISANAME " tanhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xsinhf_u35(vd), e), frx); if ((fabs(d) <= 88 && u0 > 3.5) || (d > 88 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || (d < -88 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { printf(ISANAME " sinhf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xcoshf_u35(vd), e), frx); if ((fabs(d) <= 88 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { printf(ISANAME " coshf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xtanhf_u35(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " tanhf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xasinhf(vd), e), frx); if ((fabs(d) < sqrt(FLT_MAX) && u0 > 1.0001) || (d >= sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinf(t) && t > 0))) || (d <= -sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinf(t) && t < 0)))) { printf(ISANAME " asinhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xacoshf(vd), e), frx); if ((fabs(d) < sqrt(FLT_MAX) && u0 > 1.0001) || (d >= sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinff(t) && t > 0))) || (d <= -sqrt(FLT_MAX) && !isnan(t))) { printf(ISANAME " acoshf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xatanhf(vd), e), frx); if (u0 > 1.0001) { printf(ISANAME " atanhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } // /* { int exp = (random() & 8191) - 4096; mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, mpfr_get_exp(frx) + exp); double u0 = countULPsp(t = vget(xldexpf(d, exp)), frx); if (u0 > 0.5001) { printf("Pure C ldexpf arg=%.20g %d ulp=%.20g\n", d, exp, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } */ { mpfr_set_d(frx, d, GMP_RNDN); mpfr_abs(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xfabsf(vd), e), frx); if (u0 != 0) { printf(ISANAME " fabsf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_copysign(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xcopysignf(vd, vd2), e), frx); if (u0 != 0 && !isnan(d2)) { printf(ISANAME " copysignf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_max(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xfmaxf(vd, vd2), e), frx); if (u0 != 0) { printf(ISANAME " fmaxf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_min(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xfminf(vd, vd2), e), frx); if (u0 != 0) { printf(ISANAME " fminf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_dim(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xfdimf(vd, vd2), e), frx); if (u0 > 0.5) { printf(ISANAME " fdimf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_trunc(frx, frx); double u0 = countULPsp(t = vget(xtruncf(vd), e), frx); if (u0 != 0) { printf(ISANAME " truncf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_floor(frx, frx); double u0 = countULPsp(t = vget(xfloorf(vd), e), frx); if (u0 != 0) { printf(ISANAME " floorf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_ceil(frx, frx); double u0 = countULPsp(t = vget(xceilf(vd), e), frx); if (u0 != 0) { printf(ISANAME " ceilf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_round(frx, frx); double u0 = countULPsp(t = vget(xroundf(vd), e), frx); if (u0 != 0) { printf(ISANAME " roundf arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_rint(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xrintf(vd), e), frx); if (u0 != 0) { printf(ISANAME " rintf arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_modf(fry, frz, frx, GMP_RNDN); vfloat2 t2 = xmodff(vd); double u0 = countULPsp(vget(t2.x, e), frz); double u1 = countULPsp(vget(t2.y, e), fry); if (u0 != 0 || u1 != 0) { printf(ISANAME " modff arg=%.20g ulp=%.20g %.20g\n", d, u0, u1); printf("correct = %.20g, %.20g\n", mpfr_get_d(frz, GMP_RNDN), mpfr_get_d(fry, GMP_RNDN)); printf("test = %.20g, %.20g\n", vget(t2.x, e), vget(t2.y, e)); fflush(stdout); ecnt++; } } { t = vget(xnextafterf(vd, vd2), e); double c = nextafterf(d, d2); if (!(isnan(t) && isnan(c)) && t != c) { printf(ISANAME " nextafterf arg=%.20g, %.20g\n", d, d2); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, 0); double u0 = countULPsp(t = vget(xfrfrexpf(vd), e), frx); if (d != 0 && isnumber(d) && u0 != 0) { printf(ISANAME " frfrexpf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } /* { mpfr_set_d(frx, d, GMP_RNDN); int cexp = mpfr_get_exp(frx); int texp = xexpfrexpf(d); if (d != 0 && isnumber(d) && cexp != texp) { printf(ISANAME " expfrexpf arg=%.20g\n", d); fflush(stdout); ecnt++; } } */ { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2sp(t = vget(xhypotf_u05(vd, vd2), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 > 0.5001) { printf(ISANAME " hypotf_u05 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2sp(t = vget(xhypotf_u35(vd, vd2), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 >= 3.5) { printf(ISANAME " hypotf_u35 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_fmod(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = vget(xfmodf(vd, vd2), e), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabs((double)d / d2) < 1e+38 && u0 > 0.5) { printf(ISANAME " fmodf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_set_d(frz, d3, GMP_RNDN); mpfr_fma(frx, frx, fry, frz, GMP_RNDN); double u0 = countULP2sp(t = vget(xfmaf(vd, vd2, vd3), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+34 < c && c < 1e+33 && u0 > 0.5001) || !(u0 <= 0.5001 || isinf(t))) { printf(ISANAME " fmaf arg=%.20g, %.20g, %.20g ulp=%.20g\n", d, d2, d3, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xsqrtf(vd), e), frx); if (u0 > 1.0) { printf(ISANAME " sqrtf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xsqrtf_u05(vd), e), frx); if (u0 > 0.5001) { printf(ISANAME " sqrtf_u05 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xsqrtf_u35(vd), e), frx); if (u0 > 3.5) { printf(ISANAME " sqrtf_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erfc(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = vget(xerfcf_u15(vd), e), frx); if (u0 > 1.5) { printf(ISANAME " erfcf_u15 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erf(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = vget(xerff_u1(vd), e), frx); if (u0 > 1.0) { printf(ISANAME " erff_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int s; mpfr_lgamma(frx, &s, frx, GMP_RNDN); double u0 = countULPsp(t = vget(xlgammaf_u1(vd), e), frx); if (((d < 0 && fabsl(t - mpfr_get_ld(frx, GMP_RNDN)) > 1e-8 && u0 > 1) || (0 <= d && d < 4e+36 && u0 > 1) || (4e+36 <= d && !(u0 <= 1 || isinf(t))))) { printf(ISANAME " xlgammaf_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", (float)mpfr_get_d(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_gamma(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = vget(xtgammaf_u1(vd), e), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (isnumber(c) || isnumber(t)) { if (u0 > 1.0) { printf(ISANAME " xtgammaf_u1 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", (float)mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } #if 0 if (cnt % 1000 == 0) { printf("cnt = %d \r", cnt); fflush(stdout); } #endif } } sleef-3.3.1/src/libm-tester/tester2sp.c000066400000000000000000000606261333715643700177570ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #ifdef ENABLE_SYS_getrandom #define _GNU_SOURCE #include #include #include #endif #include "sleef.h" #include "testerutil.h" #define DORENAME #include "rename.h" #if defined(__APPLE__) static int isinff(float x) { return x == __builtin_inff() || x == -__builtin_inff(); } #endif #if defined(__FreeBSD__) #define isinff(x) ((x) == (float)(1e+300) || (x) == -(float)(1e+300)) #endif #define DENORMAL_FLT_MIN (1.4012984643248170709e-45f) #define POSITIVE_INFINITYf ((float)INFINITY) #define NEGATIVE_INFINITYf (-(float)INFINITY) typedef union { double d; uint64_t u64; int64_t i64; } conv64_t; typedef union { float f; uint32_t u32; int32_t i32; } conv32_t; static float nexttoward0f(float x, int n) { union { float f; int32_t u; } cx; cx.f = x; cx.u -= n; return x == 0 ? 0 : cx.f; } float rnd() { conv32_t c; switch(random() & 63) { case 0: return nexttoward0f( 0.0, -(random() & ((1 << (random() & 31)) - 1))); case 1: return nexttoward0f(-0.0, -(random() & ((1 << (random() & 31)) - 1))); case 2: return nexttoward0f( INFINITY, (random() & ((1 << (random() & 31)) - 1))); case 3: return nexttoward0f(-INFINITY, (random() & ((1 << (random() & 31)) - 1))); } #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif return c.f; } float rnd_fr() { conv32_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif } while(!isnumber(c.f)); return c.f; } float rnd_zo() { conv32_t c; do { #ifdef ENABLE_SYS_getrandom syscall(SYS_getrandom, &c.u32, sizeof(c.u32), 0); #else c.u32 = (uint32_t)random() | ((uint32_t)random() << 31); #endif } while(!isnumber(c.f) || c.f < -1 || 1 < c.f); return c.f; } int main(int argc,char **argv) { mpfr_t frw, frx, fry, frz; mpfr_set_default_prec(256); mpfr_inits(frw, frx, fry, frz, NULL); conv32_t cd; float d, t; float d2, d3, zo; int cnt, ecnt = 0; srandom(time(NULL)); for(cnt = 0;ecnt < 1000;cnt++) { switch(cnt & 7) { case 0: d = rnd(); d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 1: cd.f = rint(rnd_zo() * 1e+10) * M_PI_4; cd.i32 += (random() & 0xff) - 0x7f; d = cd.f; d2 = rnd(); d3 = rnd(); zo = rnd(); break; case 2: cd.f = rnd_fr() * M_PI_4; cd.i32 += (random() & 0xf) - 0x7; d = cd.f; d2 = rnd(); d3 = rnd(); zo = rnd(); break; default: d = rnd_fr(); d2 = rnd_fr(); d3 = rnd_fr(); zo = rnd_zo(); break; } Sleef_float2 sc = xsincospif_u05(d); Sleef_float2 sc2 = xsincospif_u35(d); { const float rangemax2 = 1e+7/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinpi(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = sc.x, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospif_u05 sin arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = sc2.x, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospif_u35 sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2sp(t = xsinpif_u05(d), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sinpif_u05 arg=%.20g ulp=%.20g\n", d, u2); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { const float rangemax2 = 1e+7/4; mpfr_set_d(frx, d, GMP_RNDN); mpfr_cospi(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = sc.y, frx); if (u0 != 0 && ((fabs(d) <= rangemax2 && u0 > 0.505) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospif_u05 cos arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = sc.y, frx); if (u1 != 0 && ((fabs(d) <= rangemax2 && u1 > 2.0) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincospif_u35 cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } double u2 = countULP2sp(t = xcospif_u05(d), frx); if (u2 != 0 && ((fabs(d) <= rangemax2 && u2 > 0.506) || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cospif_u05 arg=%.20g ulp=%.20g\n", d, u2); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } sc = xsincosf(d); sc2 = xsincosf_u1(d); { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sin(frx, frx, GMP_RNDN); float u0 = countULPsp(t = xsinf(d), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sinf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = sc.x, frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincosf sin arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } float u2 = countULPsp(t = xsinf_u1(d), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sinf_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } float u3 = countULPsp(t = sc2.x, frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincosf_u1 sin arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cos(frx, frx, GMP_RNDN); float u0 = countULPsp(t = xcosf(d), frx); if (u0 != 0 && (u0 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cosf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = sc.y, frx); if (u1 != 0 && (u1 > 3.5 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincosf cos arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } float u2 = countULPsp(t = xcosf_u1(d), frx); if (u2 != 0 && (u2 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C cosf_u1 arg=%.20g ulp=%.20g\n", d, u2); fflush(stdout); ecnt++; } float u3 = countULPsp(t = sc2.y, frx); if (u3 != 0 && (u3 > 1 || fabs(t) > 1 || !isnumber(t))) { printf("Pure C sincosf_u1 cos arg=%.20g ulp=%.20g\n", d, u3); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tan(frx, frx, GMP_RNDN); float u0 = countULPsp(t = xtanf(d), frx); if (u0 != 0 && (u0 > 3.5 || isnan(t))) { printf("Pure C tanf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } float u1 = countULPsp(t = xtanf_u1(d), frx); if (u1 != 0 && (u1 > 1 || isnan(t))) { printf("Pure C tanf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xlogf(fabsf(d)), frx); if (u0 > 3.5) { printf("Pure C logf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xlogf_u1(fabsf(d)), frx); if (u1 > 1) { printf("Pure C logf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log10(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xlog10f(fabsf(d)), frx); if (u0 > 1) { printf("Pure C log10f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, fabsf(d), GMP_RNDN); mpfr_log2(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xlog2f(fabsf(d)), frx); if (u0 > 1) { printf("Pure C log2f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_log1p(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xlog1pf(d), frx); if ((-1 <= d && d <= 1e+38 && u0 > 1) || (d < -1 && !isnan(t)) || (d > 1e+38 && !(u0 <= 1 || isinf(t)))) { printf("Pure C log1pf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xexpf(d), frx); if (u0 > 1) { printf("Pure C expf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp2(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xexp2f(d), frx); if (u0 > 1) { printf("Pure C exp2f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_exp10(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xexp10f(d), frx); if (u0 > 1) { printf("Pure C exp10f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_expm1(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xexpm1f(d), frx); if (u0 > 1) { printf("Pure C expm1f arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_pow(frx, fry, frx, GMP_RNDN); double u0 = countULPsp(t = xpowf(d2, d), frx); if (u0 > 1) { printf("Pure C powf arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cbrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xcbrtf(d), frx); if (u0 > 3.5) { printf("Pure C cbrtf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xcbrtf_u1(d), frx); if (u1 > 1) { printf("Pure C cbrtf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_asin(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xasinf(zo), frx); if (u0 > 3.5) { printf("Pure C asinf arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xasinf_u1(zo), frx); if (u1 > 1) { printf("Pure C asinf_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, zo, GMP_RNDN); mpfr_acos(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xacosf(zo), frx); if (u0 > 3.5) { printf("Pure C acosf arg=%.20g ulp=%.20g\n", zo, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xacosf_u1(zo), frx); if (u1 > 1) { printf("Pure C acosf_u1 arg=%.20g ulp=%.20g\n", zo, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atan(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xatanf(d), frx); if (u0 > 3.5) { printf("Pure C atanf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } double u1 = countULPsp(t = xatanf_u1(d), frx); if (u1 > 1) { printf("Pure C atanf_u1 arg=%.20g ulp=%.20g\n", d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_atan2(frx, fry, frx, GMP_RNDN); double u0 = countULPsp(t = xatan2f(d2, d), frx); if (u0 > 3.5) { printf("Pure C atan2f arg=%.20g, %.20g ulp=%.20g\n", d2, d, u0); fflush(stdout); ecnt++; } double u1 = countULP2sp(t = xatan2f_u1(d2, d), frx); if (u1 > 1) { printf("Pure C atan2f_u1 arg=%.20g, %.20g ulp=%.20g\n", d2, d, u1); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xsinhf(d), frx); if ((fabs(d) <= 88.5 && u0 > 1) || (d > 88.5 && !(u0 <= 1 || (isinf(t) && t > 0))) || (d < -88.5 && !(u0 <= 1 || (isinf(t) && t < 0)))) { printf("Pure C sinhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xcoshf(d), frx); if ((fabs(d) <= 88.5 && u0 > 1) || !(u0 <= 1 || (isinf(t) && t > 0))) { printf("Pure C coshf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xtanhf(d), frx); if (u0 > 1.0001) { printf("Pure C tanhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xsinhf_u35(d), frx); if ((fabs(d) <= 88 && u0 > 3.5) || (d > 88 && !(u0 <= 3.5 || (isinf(t) && t > 0))) || (d < -88 && !(u0 <= 3.5 || (isinf(t) && t < 0)))) { printf("Pure C sinhf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_cosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xcoshf_u35(d), frx); if ((fabs(d) <= 88 && u0 > 3.5) || !(u0 <= 3.5 || (isinf(t) && t > 0))) { printf("Pure C coshf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_tanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xtanhf_u35(d), frx); if (u0 > 3.5) { printf("Pure C tanhf_u35 arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_asinh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xasinhf(d), frx); if ((fabs(d) < sqrt(FLT_MAX) && u0 > 1.0001) || (d >= sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinf(t) && t > 0))) || (d <= -sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinf(t) && t < 0)))) { printf("Pure C asinhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_acosh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xacoshf(d), frx); if ((fabs(d) < sqrt(FLT_MAX) && u0 > 1.0001) || (d >= sqrt(FLT_MAX) && !(u0 <= 1.0001 || (isinff(t) && t > 0))) || (d <= -sqrt(FLT_MAX) && !isnan(t))) { printf("Pure C acoshf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_atanh(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xatanhf(d), frx); if (u0 > 1.0001) { printf("Pure C atanhf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } // { int exp = (random() & 8191) - 4096; mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, mpfr_get_exp(frx) + exp); double u0 = countULPsp(t = xldexpf(d, exp), frx); if (u0 > 0.5002) { printf("Pure C ldexpf arg=%.20g %d ulp=%.20g\n", d, exp, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_abs(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xfabsf(d), frx); if (u0 != 0) { printf("Pure C fabsf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_copysign(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xcopysignf(d, d2), frx); if (u0 != 0 && !isnan(d2)) { printf("Pure C copysignf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %g, test = %g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_max(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xfmaxf(d, d2), frx); if (u0 != 0) { printf("Pure C fmaxf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_min(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xfminf(d, d2), frx); if (u0 != 0) { printf("Pure C fminf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_dim(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xfdimf(d, d2), frx); if (u0 > 0.5) { printf("Pure C fdimf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_trunc(frx, frx); double u0 = countULPsp(t = xtruncf(d), frx); if (u0 != 0) { printf("Pure C truncf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_floor(frx, frx); double u0 = countULPsp(t = xfloorf(d), frx); if (u0 != 0) { printf("Pure C floorf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_ceil(frx, frx); double u0 = countULPsp(t = xceilf(d), frx); if (u0 != 0) { printf("Pure C ceilf arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_round(frx, frx); double u0 = countULPsp(t = xroundf(d), frx); if (u0 != 0) { printf("Pure C roundf arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_rint(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xrintf(d), frx); if (u0 != 0) { printf("Pure C rintf arg=%.24g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_modf(fry, frz, frx, GMP_RNDN); Sleef_float2 t2 = xmodff(d); double u0 = countULPsp(t2.x, frz); double u1 = countULPsp(t2.y, fry); if (u0 != 0 || u1 != 0) { printf("Pure C modff arg=%.20g ulp=%.20g %.20g\n", d, u0, u1); printf("correct = %.20g, %.20g\n", mpfr_get_d(frz, GMP_RNDN), mpfr_get_d(fry, GMP_RNDN)); printf("test = %.20g, %.20g\n", t2.x, t2.y); fflush(stdout); ecnt++; } } { t = xnextafterf(d, d2); double c = nextafterf(d, d2); if (!(isnan(t) && isnan(c)) && t != c) { printf("Pure C nextafterf arg=%.20g, %.20g\n", d, d2); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_exp(frx, 0); double u0 = countULPsp(t = xfrfrexpf(d), frx); if (d != 0 && isnumber(d) && u0 != 0) { printf("Pure C frfrexpf arg=%.20g ulp=%.20g\n", d, u0); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int cexp = mpfr_get_exp(frx); int texp = xexpfrexpf(d); if (d != 0 && isnumber(d) && cexp != texp) { printf("Pure C expfrexpf arg=%.20g\n", d); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2sp(t = xhypotf_u05(d, d2), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 > 0.5001) { printf("Pure C hypotf_u05 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_hypot(frx, frx, fry, GMP_RNDN); double u0 = countULP2sp(t = xhypotf_u35(d, d2), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (u0 >= 3.5) { printf("Pure C hypotf_u35 arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_fmod(frx, frx, fry, GMP_RNDN); double u0 = countULPsp(t = xfmodf(d, d2), frx); long double c = mpfr_get_ld(frx, GMP_RNDN); if (fabs((double)d / d2) < 1e+38 && u0 > 0.5) { printf("Pure C fmodf arg=%.20g, %.20g ulp=%.20g\n", d, d2, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_set_d(fry, d2, GMP_RNDN); mpfr_set_d(frz, d3, GMP_RNDN); mpfr_fma(frx, frx, fry, frz, GMP_RNDN); double u0 = countULP2sp(t = xfmaf(d, d2, d3), frx); double c = mpfr_get_d(frx, GMP_RNDN); if ((-1e+34 < c && c < 1e+33 && u0 > 0.5001) || !(u0 <= 0.5001 || isinf(t))) { printf("Pure C fmaf arg=%.20g, %.20g, %.20g ulp=%.20g\n", d, d2, d3, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xsqrtf_u05(d), frx); if (u0 > 0.5001) { printf("Pure C sqrtf_u05 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_sqrt(frx, frx, GMP_RNDN); double u0 = countULPsp(t = xsqrtf_u35(d), frx); if (u0 > 3.5) { printf("Pure C sqrtf_u35 arg=%.20g ulp=%.20g\n", d, u0); printf("correct = %.20g, test = %.20g\n", mpfr_get_d(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erfc(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = xerfcf_u15(d), frx); if (u0 > 1.5) { printf("Pure C erfcf arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_erf(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = xerff_u1(d), frx); if (u0 > 1.0) { printf("Pure C erff arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); int s; mpfr_lgamma(frx, &s, frx, GMP_RNDN); double u0 = countULPsp(t = xlgammaf_u1(d), frx); if (((d < 0 && fabsl(t - mpfr_get_ld(frx, GMP_RNDN)) > 1e-8 && u0 > 1) || (0 <= d && d < 4e+36 && u0 > 1) || (4e+36 <= d && !(u0 <= 1 || isinf(t))))) { printf("Pure C xlgammaf arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); printf("Diff = %.20Lg\n", fabsl(t - mpfr_get_ld(frx, GMP_RNDN))); fflush(stdout); ecnt++; } } { mpfr_set_d(frx, d, GMP_RNDN); mpfr_gamma(frx, frx, GMP_RNDN); double u0 = countULP2sp(t = xtgammaf_u1(d), frx); double c = mpfr_get_d(frx, GMP_RNDN); if (isnumber(c) || isnumber(t)) { if (u0 > 1.0) { printf("Pure C xtgamma arg=%.20g ulp=%.20g\n", d, u0); printf("Correct = %.20Lg, test = %.20g\n", mpfr_get_ld(frx, GMP_RNDN), t); fflush(stdout); ecnt++; } } } } exit(0); } sleef-3.3.1/src/libm-tester/testerutil.c000066400000000000000000000220021333715643700202120ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include #include #include #if defined(POWER64_UNDEF_USE_EXTERN_INLINES) // This is a workaround required to cross compile for PPC64 binaries #include #ifdef __USE_EXTERN_INLINES #undef __USE_EXTERN_INLINES #endif #endif #include #if defined(__MINGW32__) || defined(__MINGW64__) || defined(_MSC_VER) #define STDIN_FILENO 0 #else #include #include #include #endif #if defined(__MINGW32__) || defined(__MINGW64__) #include #endif #include "misc.h" #define DENORMAL_DBL_MIN (4.9406564584124654418e-324) #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) #define DENORMAL_FLT_MIN (1.4012984643248170709e-45f) #define POSITIVE_INFINITYf ((float)INFINITY) #define NEGATIVE_INFINITYf (-(float)INFINITY) int isnumber(double x) { return !isinf(x) && !isnan(x); } int isPlusZero(double x) { return x == 0 && copysign(1, x) == 1; } int isMinusZero(double x) { return x == 0 && copysign(1, x) == -1; } double sign(double d) { return d < 0 ? -1 : 1; } int xisnan(double x) { return x != x; } int isnumberf(float x) { return !isinff(x) && !isnanf(x); } int isPlusZerof(float x) { return x == 0 && copysignf(1, x) == 1; } int isMinusZerof(float x) { return x == 0 && copysignf(1, x) == -1; } float signf(float d) { return d < 0 ? -1 : 1; } int xisnanf(float x) { return x != x; } int enableFlushToZero = 0; double flushToZero(double y) { if (enableFlushToZero && fabs(y) < FLT_MIN) y = copysign(0.0, y); return y; } // double u2d(uint64_t u) { union { double f; uint64_t i; } tmp; tmp.i = u; return tmp.f; } uint64_t d2u(double d) { union { double f; uint64_t i; } tmp; tmp.f = d; return tmp.i; } float u2f(uint32_t u) { union { float f; uint32_t i; } tmp; tmp.i = u; return tmp.f; } uint32_t f2u(float d) { union { float f; uint32_t i; } tmp; tmp.f = d; return tmp.i; } // int readln(int fd, char *buf, int cnt) { int i, rcnt = 0; if (cnt < 1) return -1; while(cnt >= 2) { i = read(fd, buf, 1); if (i != 1) return i; if (*buf == '\n') break; rcnt++; buf++; cnt--; } *++buf = '\0'; rcnt++; return rcnt; } int startsWith(char *str, char *prefix) { return strncmp(str, prefix, strlen(prefix)) == 0; } // #ifdef USEMPFR #include int cmpDenormsp(float x, mpfr_t fry) { float y = mpfr_get_d(fry, GMP_RNDN); x = flushToZero(x); y = flushToZero(y); if (xisnanf(x) && xisnanf(y)) return 1; if (xisnanf(x) || xisnanf(y)) return 0; if (isinf(x) != isinf(y)) return 0; if (x == POSITIVE_INFINITYf && y == POSITIVE_INFINITYf) return 1; if (x == NEGATIVE_INFINITYf && y == NEGATIVE_INFINITYf) return 1; if (y == 0) { if (isPlusZerof(x) && isPlusZerof(y)) return 1; if (isMinusZerof(x) && isMinusZerof(y)) return 1; return 0; } if (!xisnanf(x) && !xisnanf(y) && !isinf(x) && !isinf(y)) return signf(x) == signf(y); return 0; } int cmpDenormdp(double x, mpfr_t fry) { double y = mpfr_get_d(fry, GMP_RNDN); if (xisnan(x) && xisnan(y)) return 1; if (xisnan(x) || xisnan(y)) return 0; if (isinf(x) != isinf(y)) return 0; if (x == POSITIVE_INFINITY && y == POSITIVE_INFINITY) return 1; if (x == NEGATIVE_INFINITY && y == NEGATIVE_INFINITY) return 1; if (y == 0) { if (isPlusZero(x) && isPlusZero(y)) return 1; if (isMinusZero(x) && isMinusZero(y)) return 1; return 0; } if (!xisnan(x) && !xisnan(y) && !isinf(x) && !isinf(y)) return sign(x) == sign(y); return 0; } double countULPdp(double d, mpfr_t c) { mpfr_t fra, frb, frc, frd; mpfr_inits(fra, frb, frc, frd, NULL); double c2 = mpfr_get_d(c, GMP_RNDN); if (c2 == 0 && d != 0) { mpfr_clears(fra, frb, frc, frd, NULL); return 10000; } if (isnan(c2) && isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (isnan(c2) || isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 10001; } if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } double v = 0; if (isinf(d) && !isinf(mpfr_get_d(c, GMP_RNDN))) { d = copysign(DBL_MAX, c2); v = 1; } // int e; frexp(mpfr_get_d(c, GMP_RNDN), &e); mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-53), DENORMAL_DBL_MIN), GMP_RNDN); mpfr_set_d(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); mpfr_clears(fra, frb, frc, frd, NULL); return u + v; } double countULP2dp(double d, mpfr_t c) { mpfr_t fra, frb, frc, frd; mpfr_inits(fra, frb, frc, frd, NULL); double c2 = mpfr_get_d(c, GMP_RNDN); if (c2 == 0 && d != 0) { mpfr_clears(fra, frb, frc, frd, NULL); return 10000; } if (isnan(c2) && isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (isnan(c2) || isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 10001; } if (c2 == POSITIVE_INFINITY && d == POSITIVE_INFINITY) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (c2 == NEGATIVE_INFINITY && d == NEGATIVE_INFINITY) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } double v = 0; if (isinf(d) && !isinf(mpfr_get_d(c, GMP_RNDN))) { d = copysign(DBL_MAX, c2); v = 1; } // int e; frexp(mpfr_get_d(c, GMP_RNDN), &e); mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-53), DBL_MIN), GMP_RNDN); mpfr_set_d(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); mpfr_clears(fra, frb, frc, frd, NULL); return u + v; } double countULPsp(float d, mpfr_t c) { mpfr_t fra, frb, frc, frd; mpfr_inits(fra, frb, frc, frd, NULL); d = flushToZero(d); float c2 = flushToZero(mpfr_get_d(c, GMP_RNDN)); if (c2 == 0 && d != 0) { mpfr_clears(fra, frb, frc, frd, NULL); return 10000; } if (isnan(c2) && isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (isnan(c2) || isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 10001; } if (c2 == POSITIVE_INFINITYf && d == POSITIVE_INFINITYf) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (c2 == NEGATIVE_INFINITYf && d == NEGATIVE_INFINITYf) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } double v = 0; if (isinf(d) && !isinf(mpfr_get_d(c, GMP_RNDN))) { d = copysign(FLT_MAX, c2); v = 1; } // int e; frexp(mpfr_get_d(c, GMP_RNDN), &e); mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-24), DENORMAL_FLT_MIN), GMP_RNDN); mpfr_set_d(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); mpfr_clears(fra, frb, frc, frd, NULL); return u + v; } double countULP2sp(float d, mpfr_t c) { mpfr_t fra, frb, frc, frd; mpfr_inits(fra, frb, frc, frd, NULL); d = flushToZero(d); float c2 = flushToZero(mpfr_get_d(c, GMP_RNDN)); if (c2 == 0 && d != 0) { mpfr_clears(fra, frb, frc, frd, NULL); return 10000; } if (isnan(c2) && isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (isnan(c2) || isnan(d)) { mpfr_clears(fra, frb, frc, frd, NULL); return 10001; } if (c2 == POSITIVE_INFINITYf && d == POSITIVE_INFINITYf) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } if (c2 == NEGATIVE_INFINITYf && d == NEGATIVE_INFINITYf) { mpfr_clears(fra, frb, frc, frd, NULL); return 0; } double v = 0; if (isinf(d) && !isinf(mpfr_get_d(c, GMP_RNDN))) { d = copysign(FLT_MAX, c2); v = 1; } // int e; frexp(mpfr_get_d(c, GMP_RNDN), &e); mpfr_set_ld(frb, fmaxl(ldexpl(1.0, e-24), FLT_MIN), GMP_RNDN); mpfr_set_d(frd, d, GMP_RNDN); mpfr_sub(fra, frd, c, GMP_RNDN); mpfr_div(fra, fra, frb, GMP_RNDN); double u = fabs(mpfr_get_d(fra, GMP_RNDN)); mpfr_clears(fra, frb, frc, frd, NULL); return u + v; } // void mpfr_sinpi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_mul(frd, frpi, arg, GMP_RNDN); mpfr_sin(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } void mpfr_cospi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd) { mpfr_t frpi, frd; mpfr_inits(frpi, frd, NULL); mpfr_const_pi(frpi, GMP_RNDN); mpfr_set_d(frd, 1.0, GMP_RNDN); mpfr_mul(frpi, frpi, frd, GMP_RNDN); mpfr_mul(frd, frpi, arg, GMP_RNDN); mpfr_cos(ret, frd, GMP_RNDN); mpfr_clears(frpi, frd, NULL); } void mpfr_lgamma_nosign(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd) { int s; mpfr_lgamma(ret, &s, arg, rnd); } #endif // #define USEMPFR sleef-3.3.1/src/libm-tester/testerutil.h000066400000000000000000000026251333715643700202300ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #define DENORMAL_DBL_MIN (4.9406564584124654418e-324) #define POSITIVE_INFINITY INFINITY #define NEGATIVE_INFINITY (-INFINITY) #define DENORMAL_FLT_MIN (1.4012984643248170709e-45f) #define POSITIVE_INFINITYf ((float)INFINITY) #define NEGATIVE_INFINITYf (-(float)INFINITY) #define M_PIf ((float)M_PI) int enableFlushToZero; double flushToZero(double y); int isnumber(double x); int isPlusZero(double x); int isMinusZero(double x); int xisnan(double x); double sign(double d); int isnumberf(float x); int isPlusZerof(float x); int isMinusZerof(float x); int xisnanf(float x); float signf(float d); double u2d(uint64_t u); uint64_t d2u(double d); float u2f(uint32_t u); uint32_t f2u(float d); int readln(int fd, char *buf, int cnt); int startsWith(char *str, char *prefix); #ifdef USEMPFR int cmpDenormdp(double x, mpfr_t fry); double countULPdp(double d, mpfr_t c); double countULP2dp(double d, mpfr_t c); int cmpDenormsp(float x, mpfr_t fry); double countULPsp(float d, mpfr_t c); double countULP2sp(float d, mpfr_t c); void mpfr_sinpi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd); void mpfr_cospi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd); void mpfr_lgamma_nosign(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd); #endif sleef-3.3.1/src/libm/000077500000000000000000000000001333715643700143425ustar00rootroot00000000000000sleef-3.3.1/src/libm/CMakeLists.txt000066400000000000000000000434371333715643700171150ustar00rootroot00000000000000file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/) include_directories(${CMAKE_CURRENT_BINARY_DIR}/include/) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) set(CMAKE_C_FLAGS ORG_CMAKE_C_FLAGS) string(CONCAT CMAKE_C_FLAGS ${SLEEF_C_FLAGS}) set(EXT_ENABLE_ALIAS AVX512F ADVSIMD NEON32 VSX) # -------------------------------------------------------------------- # sleef.h # -------------------------------------------------------------------- # File generated for the headers set(SLEEF_ORG_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/sleeflibm_header.h.org) set(SLEEF_ORG_FOOTER ${CMAKE_CURRENT_SOURCE_DIR}/sleeflibm_footer.h.org) set(SLEEF_INCLUDE_HEADER ${CMAKE_BINARY_DIR}/include/sleef.h) set(SLEEF_HEADER_COMMANDS "") list(APPEND SLEEF_HEADER_COMMANDS COMMAND ${CMAKE_COMMAND} -E copy ${SLEEF_ORG_HEADER} ${SLEEF_INCLUDE_HEADER}) foreach(SIMD ${SLEEF_HEADER_LIST}) list(APPEND SLEEF_HEADER_COMMANDS COMMAND echo Generating sleef.h: ${TARGET_MKRENAME} ${HEADER_PARAMS_${SIMD}}) list(APPEND SLEEF_HEADER_COMMANDS COMMAND $ ${HEADER_PARAMS_${SIMD}} >> ${SLEEF_INCLUDE_HEADER}) endforeach() if(MSVC) string(REPLACE "/" "\\" sleef_footer_input_file "${SLEEF_ORG_FOOTER}") list(APPEND SLEEF_HEADER_COMMANDS COMMAND type ${sleef_footer_input_file} >> ${SLEEF_INCLUDE_HEADER}) else() list(APPEND SLEEF_HEADER_COMMANDS COMMAND cat ${SLEEF_ORG_FOOTER} >> ${SLEEF_INCLUDE_HEADER}) endif() add_custom_command(OUTPUT ${SLEEF_INCLUDE_HEADER} ${SLEEF_HEADER_COMMANDS} DEPENDS ${SLEEF_ORG_HEADER} ${SLEEF_ORG_FOOTER} ${TARGET_MKRENAME} ) # -------------------------------------------------------------------- # TARGET_MKRENAME # renameXXX.h for each SIMD # -------------------------------------------------------------------- # Helper executable: generates parts of the sleef header file add_host_executable(${TARGET_MKRENAME} mkrename.c) set(HEADER_FILES_GENERATED "") foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) # Need lowercase string for rename header string(TOLOWER ${SIMD} vecarch) set(OBJECT_${SIMD} "sleef${vecarch}") set(HEADER_${SIMD} ${CMAKE_CURRENT_BINARY_DIR}/include/rename${vecarch}.h) list(APPEND HEADER_FILES_GENERATED ${HEADER_${SIMD}}) # Generate mkrename commands add_custom_command(OUTPUT ${HEADER_${SIMD}} COMMAND echo Generating rename${vecarch}.h: ${TARGET_MKRENAME} ${RENAME_PARAMS_${SIMD}} COMMAND $ ${RENAME_PARAMS_${SIMD}} > ${HEADER_${SIMD}} DEPENDS ${TARGET_MKRENAME} ) add_custom_target(rename${SIMD}.h_generated DEPENDS ${HEADER_${SIMD}}) endif() endforeach() # -------------------------------------------------------------------- # TARGET_MKRENAME_GNUABI # renameXXX_gnuabi.h for each SIMD GNU Abi # -------------------------------------------------------------------- # Helper executable: generates parts of the sleef header file gnu_abi add_host_executable(${TARGET_MKRENAME_GNUABI} mkrename_gnuabi.c) set(HEADER_GNUABI_FILES_GENERATED "") if(ENABLE_GNUABI) foreach(SIMD ${SLEEF_SUPPORTED_GNUABI_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) string(TOLOWER ${SIMD} vecarch) set(OBJECT_${SIMD}_dp_GNUABI "sleefgnuabi${vecarch}dp") set(OBJECT_${SIMD}_sp_GNUABI "sleefgnuabi${vecarch}sp") set(HEADER_${SIMD}_GNUABI ${CMAKE_CURRENT_BINARY_DIR}/include/rename${vecarch}_gnuabi.h) list(APPEND HEADER_GNUABI_FILES_GENERATED ${HEADER_${SIMD}_GNUABI}) # Generate mkrename_gnuabi commands add_custom_command(OUTPUT ${HEADER_${SIMD}_GNUABI} COMMAND echo Generating rename${vecarch}_gnuabi.h: ${TARGET_MKRENAME_GNUABI} ${RENAME_PARAMS_GNUABI_${SIMD}} COMMAND $ ${RENAME_PARAMS_GNUABI_${SIMD}} > ${HEADER_${SIMD}_GNUABI} DEPENDS ${TARGET_MKRENAME_GNUABI} ) # set_source_files_properties(${HEADER_${SIMD}_GNUABI} PROPERTIES GENERATED TRUE) endif() endforeach() endif() # -------------------------------------------------------------------- # TARGET_MKMASKED_GNUABI add_host_executable(${TARGET_MKMASKED_GNUABI} mkmasked_gnuabi.c) # maskedXXX_YY_gnuabi.h if(ENABLE_GNUABI) foreach(SIMD ${SLEEF_SUPPORTED_GNUABI_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD} AND MKMASKED_PARAMS_GNUABI_${SIMD}_sp) string(TOLOWER ${SIMD} vecarch) set(HEADER_GENERATED "") foreach(T dp sp) set(HEADER_MASKED_${SIMD}_${T}_GNUABI ${CMAKE_CURRENT_BINARY_DIR}/include/masked_${vecarch}_${T}_gnuabi.h) list(APPEND HEADER_GENERATED ${HEADER_MASKED_${SIMD}_${T}_GNUABI}) add_custom_command(OUTPUT ${HEADER_MASKED_${SIMD}_${T}_GNUABI} COMMAND echo Generating ${HEADER_MASKED_${SIMD}_${T}_GNUABI} COMMAND $ ${MKMASKED_PARAMS_GNUABI_${SIMD}_${T}} > ${HEADER_MASKED_${SIMD}_${T}_GNUABI} DEPENDS ${TARGET_MKMASKED_GNUABI} ) endforeach() add_custom_target(masked${SIMD}_generated DEPENDS ${HEADER_GENERATED}) endif() endforeach() endif() # -------------------------------------------------------------------- # TARGET_HEADERS # -------------------------------------------------------------------- add_custom_target(${TARGET_HEADERS} ALL DEPENDS ${SLEEF_INCLUDE_HEADER} # Output only ${HEADER_FILES_GENERATED} # Output only ${HEADER_GNUABI_FILES_GENERATED} # Output only ) # -------------------------------------------------------------------- # TARGET_MKALIAS # -------------------------------------------------------------------- add_host_executable(${TARGET_MKALIAS} mkalias.c) # -------------------------------------------------------------------- # TARGET_MKDISP # -------------------------------------------------------------------- # Helper executable: dispatcher for the vector extensions add_host_executable(${TARGET_MKDISP} mkdisp.c) # Set C standard requirement (-std=gnu99 for gcc) set_target_properties( ${TARGET_MKRENAME} ${TARGET_MKRENAME_GNUABI} ${TARGET_MKDISP} ${TARGET_MKALIAS} ${TARGET_MKMASKED_GNUABI} PROPERTIES C_STANDARD 99 ) # -------------------------------------------------------------------- # TARGET_LIBSLEEF # -------------------------------------------------------------------- # Build main library set(COMMON_TARGET_PROPERTIES C_STANDARD 99 # -std=gnu99 ) if (BUILD_SHARED_LIBS) list(APPEND COMMON_TARGET_PROPERTIES POSITION_INDEPENDENT_CODE ON) # -fPIC endif() # Original sleef sources set(STANDARD_SOURCES sleefdp.c sleefsp.c rempitab.c) # Check for different precision support and add sources accordingly if(COMPILER_SUPPORTS_LONG_DOUBLE) list(APPEND STANDARD_SOURCES sleefld.c) endif() add_library(${TARGET_LIBSLEEF} ${STANDARD_SOURCES}) set_target_properties(${TARGET_LIBSLEEF} PROPERTIES VERSION ${SLEEF_VERSION} SOVERSION ${SLEEF_SOVERSION} ${COMMON_TARGET_PROPERTIES} ) target_compile_definitions(${TARGET_LIBSLEEF} PRIVATE DORENAME=1 ${COMMON_TARGET_DEFINITIONS} ) if(COMPILER_SUPPORTS_FLOAT128) # TODO: Not supported for LLVM bitcode gen as it has a specific compilation flags target_sources(${TARGET_LIBSLEEF} PRIVATE sleefqp.c) target_compile_definitions(${TARGET_LIBSLEEF} PRIVATE ENABLEFLOAT128=1 ${COMMON_TARGET_DEFINITIONS}) endif() if(COMPILER_SUPPORTS_BUILTIN_MATH) target_compile_definitions(${TARGET_LIBSLEEF} PRIVATE ENABLE_BUILTIN_MATH=1) endif() # Compile SIMD versions # Single precision and double precision set(SIMD_SOURCES sleefsimdsp.c sleefsimddp.c) if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") set(SIMD_SOURCES sleefsimdsp.c) endif() # Include symbols for each SIMD architecture (if supported by the platform) # Note: adds object file as sources via cmake conditional generator expression foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) list(FIND EXT_ENABLE_ALIAS ${SIMD} INDEX_ALIAS) string(TOLOWER ${SIMD} SIMDLC) # Create a library add_library(${OBJECT_${SIMD}} OBJECT ${SIMD_SOURCES} ${HEADER_${SIMD}}) if (INDEX_ALIAS EQUAL -1) target_compile_definitions(${OBJECT_${SIMD}} PRIVATE ENABLE_${SIMD}=1 DORENAME=1 ${COMMON_TARGET_DEFINITIONS} ) else() add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include/alias${SIMD}.h ${CMAKE_CURRENT_BINARY_DIR}/include/alias_${SIMDLC}.h COMMENT "Generating alias_${SIMDLC}.h" COMMAND $ ${ALIAS_PARAMS_${SIMD}_SP} > ${CMAKE_CURRENT_BINARY_DIR}/include/alias_${SIMDLC}.h COMMAND $ ${ALIAS_PARAMS_${SIMD}_DP} >> ${CMAKE_CURRENT_BINARY_DIR}/include/alias_${SIMDLC}.h DEPENDS ${TARGET_MKALIAS} ) add_custom_target(alias_${SIMDLC}.h_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/include/alias_${SIMDLC}.h) add_dependencies(${OBJECT_${SIMD}} alias_${SIMDLC}.h_generated) target_compile_definitions(${OBJECT_${SIMD}} PRIVATE ENABLE_${SIMD}=1 DORENAME=1 ${COMMON_TARGET_DEFINITIONS} ALIAS_NO_EXT_SUFFIX=\"alias_${SIMDLC}.h\" ) endif() add_dependencies(${OBJECT_${SIMD}} rename${SIMD}.h_generated) set_target_properties(${OBJECT_${SIMD}} PROPERTIES ${COMMON_TARGET_PROPERTIES} ) target_compile_options(${OBJECT_${SIMD}} PRIVATE ${FLAGS_ENABLE_${SIMD}}) target_sources(${TARGET_LIBSLEEF} PRIVATE $) endif(COMPILER_SUPPORTS_${SIMD}) endforeach() # On some systems we need to explicitly link libsleef against libm to # use some of the math functions used in the scalar code (for example # sqrt). if(LIBM AND NOT COMPILER_SUPPORTS_BUILTIN_MATH) target_link_libraries(${TARGET_LIBSLEEF} ${LIBM}) endif() target_sources(${TARGET_LIBSLEEF} PRIVATE $) # -------------------------------------------------------------------- if (SLEEF_ARCH_X86) # Target dispsse.c add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/dispsse.c COMMENT "Generating dispsse.c" COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/dispsse.c.org ${CMAKE_CURRENT_BINARY_DIR}/dispsse.c COMMAND $ 2 4 __m128d __m128 __m128i sse2 sse4 avx2128 >> ${CMAKE_CURRENT_BINARY_DIR}/dispsse.c DEPENDS ${TARGET_MKDISP} ) add_custom_target(dispsse.c_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dispsse.c) # Target renamedsp128.h add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp128.h COMMENT "Generating renamedsp128.h" COMMAND $ 2 4 > ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp128.h DEPENDS ${TARGET_MKRENAME} ) add_custom_target(renamedsp128.h_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp128.h) # Target dispavx.c add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/dispavx.c COMMENT "Generating dispavx.c" COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/dispavx.c.org ${CMAKE_CURRENT_BINARY_DIR}/dispavx.c COMMAND $ 4 8 __m256d __m256 __m128i avx fma4 avx2 >> ${CMAKE_CURRENT_BINARY_DIR}/dispavx.c DEPENDS ${TARGET_MKDISP} ) add_custom_target(dispavx.c_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dispavx.c) # Target renamedsp256.h add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp256.h COMMENT "Generating renamedsp256.h" COMMAND $ 4 8 > ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp256.h DEPENDS ${TARGET_MKRENAME} ) add_custom_target(renamedsp256.h_generated SOURCES ${CMAKE_CURRENT_BINARY_DIR}/include/renamedsp256.h) # Target dispsse_obj if (COMPILER_SUPPORTS_FMA4) set(DISPATCHER_DEFINITIONS ${DISPATCHER_DEFINITIONS} ENABLE_FMA4=1) endif() if (COMPILER_SUPPORTS_AVX2) set(DISPATCHER_DEFINITIONS ${DISPATCHER_DEFINITIONS} ENABLE_AVX2=1) endif() add_library(dispsse_obj OBJECT dispsse.c) target_compile_options(dispsse_obj PRIVATE ${FLAGS_ENABLE_SSE2}) set_target_properties(dispsse_obj PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_compile_definitions(dispsse_obj PRIVATE ${COMMON_TARGET_DEFINITIONS} ${DISPATCHER_DEFINITIONS}) target_include_directories(dispsse_obj PRIVATE ${CMAKE_BINARY_DIR}/include) add_dependencies(dispsse_obj dispsse.c_generated renamedsp128.h_generated ${TARGET_HEADERS}) target_sources(${TARGET_LIBSLEEF} PRIVATE $) # Target dispavx_obj add_library(dispavx_obj OBJECT dispavx.c) target_compile_options(dispavx_obj PRIVATE ${FLAGS_ENABLE_AVX}) set_target_properties(dispavx_obj PROPERTIES ${COMMON_TARGET_PROPERTIES}) target_compile_definitions(dispavx_obj PRIVATE ${COMMON_TARGET_DEFINITIONS} ${DISPATCHER_DEFINITIONS}) target_include_directories(dispavx_obj PRIVATE ${CMAKE_BINARY_DIR}/include) add_dependencies(dispavx_obj dispavx.c_generated renamedsp256.h_generated ${TARGET_HEADERS}) target_sources(${TARGET_LIBSLEEF} PRIVATE $) endif(SLEEF_ARCH_X86) # -------------------------------------------------------------------- # TARGET_LIBSLEEFGNUABI # Compile SIMD versions for GNU Abi # -------------------------------------------------------------------- # Build gnuabi version from just simd object files if(ENABLE_GNUABI) set(TARGET_LIBSLEEFGNUABI_OBJECTS "") foreach(SIMD ${SLEEF_SUPPORTED_GNUABI_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) # Need lowercase string for rename header string(TOLOWER ${SIMD} vecarch) foreach(T dp sp) add_library(${OBJECT_${SIMD}_${T}_GNUABI} OBJECT sleefsimd${T}.c ${HEADER_${SIMD}_GNUABI}) target_compile_definitions(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE ENABLE_${SIMD}=1 DORENAME=1 ENABLE_GNUABI=1 ) set_target_properties(${OBJECT_${SIMD}_${T}_GNUABI} PROPERTIES ${COMMON_TARGET_PROPERTIES} ) target_compile_options(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE ${FLAGS_ENABLE_${SIMD}}) if (COMPILER_SUPPORTS_WEAK_ALIASES) target_compile_options(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE -DENABLE_GNUABI=1) endif(COMPILER_SUPPORTS_WEAK_ALIASES) list(APPEND TARGET_LIBSLEEFGNUABI_OBJECTS $) if(MKMASKED_PARAMS_GNUABI_${SIMD}_${T}) target_compile_definitions(${OBJECT_${SIMD}_${T}_GNUABI} PRIVATE HEADER_MASKED=\"masked_${vecarch}_${T}_gnuabi.h\") add_dependencies(${OBJECT_${SIMD}_${T}_GNUABI} masked${SIMD}_generated) endif() endforeach() endif(COMPILER_SUPPORTS_${SIMD}) endforeach() # Create library add_library(${TARGET_LIBSLEEFGNUABI} ${TARGET_LIBSLEEFGNUABI_OBJECTS} rempitab.c) # Library properties set_target_properties(${TARGET_LIBSLEEFGNUABI} PROPERTIES VERSION ${SLEEF_VERSION_MAJOR}.${SLEEF_VERSION_MINOR} SOVERSION ${SLEEF_SOVERSION} POSITION_INDEPENDENT_CODE ON # -fPIC C_STANDARD 99 # -std=gnu99 LINKER_LANGUAGE C ) # On some systems we need to explicitly link libsleefgnuabi against # libm to use some of the math functions used in the scalar code (for # example sqrt). if(LIBM AND NOT COMPILER_SUPPORTS_BUILTIN_MATH) target_link_libraries(${TARGET_LIBSLEEFGNUABI} ${LIBM}) endif() endif(ENABLE_GNUABI) # -------------------------------------------------------------------- # TARGET_LLVM_BITCODE # Generate LLVM bitcode # -------------------------------------------------------------------- if(CLANG_EXE_PATH AND SLEEF_ENABLE_LLVM_BITCODE) set(SLEEP_LLVM_BITCODE_INCLUDES "") get_property(SLEEP_LLVM_BITCODE_INCLUDES_LIST DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES) foreach(INCLUDE_DIRECTORY ${SLEEP_LLVM_BITCODE_INCLUDES_LIST}) set(SLEEP_LLVM_BITCODE_INCLUDES "${SLEEP_LLVM_BITCODE_INCLUDES} -I ${INCLUDE_DIRECTORY}") endforeach() separate_arguments(SLEEP_LLVM_BITCODE_INCLUDES_CLANG WINDOWS_COMMAND "${SLEEP_LLVM_BITCODE_INCLUDES}") set(SLEEF_CLANG_LLVM_BITCODE_OPTIONS -O3 -S -emit-llvm -D NDEBUG -D DORENAME=1) set(LLVM_BITCODE_OUTPUTS "") # Generate LLVM bitcode for regular SLEEF foreach(STANDARD_SOURCE ${STANDARD_SOURCES}) get_filename_component(SRC_WITHOUT_EXT ${STANDARD_SOURCE} NAME_WE) set(LLVM_BITCODE_INPUT ${CMAKE_CURRENT_SOURCE_DIR}/${SRC_WITHOUT_EXT}.c) set(LLVM_BITCODE_OUTPUT ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${SRC_WITHOUT_EXT}.ll) add_custom_command(OUTPUT ${LLVM_BITCODE_OUTPUT} COMMAND ${CLANG_EXE_PATH} ${SLEEF_CLANG_LLVM_BITCODE_OPTIONS} -o ${LLVM_BITCODE_OUTPUT} ${LLVM_BITCODE_INPUT} ${SLEEP_LLVM_BITCODE_INCLUDES_CLANG} DEPENDS ${LLVM_BITCODE_INPUT} ) list(APPEND LLVM_BITCODE_OUTPUTS ${LLVM_BITCODE_OUTPUT}) endforeach() # Generate LLVM bitcode for SIMD SLEEF foreach(SIMD ${SLEEF_SUPPORTED_EXTENSIONS}) if(COMPILER_SUPPORTS_${SIMD}) foreach(SIMD_SOURCE ${SIMD_SOURCES}) get_filename_component(SIMD_SOURCE_WITHOUT_EXT ${SIMD_SOURCE} NAME_WE) set(LLVM_BITCODE_INPUT ${CMAKE_CURRENT_SOURCE_DIR}/${SIMD_SOURCE}) set(LLVM_BITCODE_OUTPUT ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${SIMD_SOURCE_WITHOUT_EXT}_${SIMD}.ll) add_custom_command(OUTPUT ${LLVM_BITCODE_OUTPUT} COMMAND ${CLANG_EXE_PATH} ${CLANG_FLAGS_ENABLE_${SIMD}} ${SLEEF_CLANG_LLVM_BITCODE_OPTIONS} -D ENABLE_${SIMD}=1 -o ${LLVM_BITCODE_OUTPUT} ${LLVM_BITCODE_INPUT} ${SLEEP_LLVM_BITCODE_INCLUDES_CLANG} DEPENDS ${LLVM_BITCODE_INPUT} ) list(APPEND LLVM_BITCODE_OUTPUTS ${LLVM_BITCODE_OUTPUT}) endforeach() endif() endforeach() file(MAKE_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) add_custom_target(${TARGET_LLVM_BITCODE} ALL DEPENDS ${LLVM_BITCODE_OUTPUTS} ) add_dependencies(${TARGET_LLVM_BITCODE} ${TARGET_HEADERS}) install(FILES ${LLVM_BITCODE_OUTPUTS} DESTINATION lib) endif() # -------------------------------------------------------------------- # Install # -------------------------------------------------------------------- # Install libsleef and sleef.h install(FILES ${SLEEF_INCLUDE_HEADER} DESTINATION include) install(TARGETS ${TARGET_LIBSLEEF} DESTINATION lib) if(ENABLE_GNUABI) install(TARGETS ${TARGET_LIBSLEEFGNUABI} DESTINATION lib) endif() sleef-3.3.1/src/libm/dd.h000066400000000000000000000274541333715643700151160ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifdef ENABLE_SVE typedef __sizeless_struct vdouble2 { svfloat64_t x; svfloat64_t y; } vdouble2; #else typedef struct { vdouble x, y; } vdouble2; #endif static INLINE CONST vdouble vupper_vd_vd(vdouble d) { return vreinterpret_vd_vm(vand_vm_vm_vm(vreinterpret_vm_vd(d), vcast_vm_i_i(0xffffffff, 0xf8000000))); } static INLINE CONST vdouble2 vcast_vd2_vd_vd(vdouble h, vdouble l) { vdouble2 ret = {h, l}; return ret; } static INLINE CONST vdouble2 vcast_vd2_d_d(double h, double l) { vdouble2 ret = {vcast_vd_d(h), vcast_vd_d(l)}; return ret; } static INLINE CONST vdouble2 vsel_vd2_vo_vd2_vd2(vopmask m, vdouble2 x, vdouble2 y) { vdouble2 r; r.x = vsel_vd_vo_vd_vd(m, x.x, y.x); r.y = vsel_vd_vo_vd_vd(m, x.y, y.y); return r; } static INLINE CONST vdouble2 vsel_vd2_vo_d_d_d_d(vopmask o, double x1, double y1, double x0, double y0) { vdouble2 r; r.x = vsel_vd_vo_d_d(o, x1, x0); r.y = vsel_vd_vo_d_d(o, y1, y0); return r; } static INLINE CONST vdouble vadd_vd_3vd(vdouble v0, vdouble v1, vdouble v2) { return vadd_vd_vd_vd(vadd_vd_vd_vd(v0, v1), v2); } static INLINE CONST vdouble vadd_vd_4vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3) { return vadd_vd_3vd(vadd_vd_vd_vd(v0, v1), v2, v3); } static INLINE CONST vdouble vadd_vd_5vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4) { return vadd_vd_4vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4); } static INLINE CONST vdouble vadd_vd_6vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5) { return vadd_vd_5vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4, v5); } static INLINE CONST vdouble vadd_vd_7vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5, vdouble v6) { return vadd_vd_6vd(vadd_vd_vd_vd(v0, v1), v2, v3, v4, v5, v6); } static INLINE CONST vdouble vsub_vd_3vd(vdouble v0, vdouble v1, vdouble v2) { return vsub_vd_vd_vd(vsub_vd_vd_vd(v0, v1), v2); } static INLINE CONST vdouble vsub_vd_4vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3) { return vsub_vd_3vd(vsub_vd_vd_vd(v0, v1), v2, v3); } static INLINE CONST vdouble vsub_vd_5vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4) { return vsub_vd_4vd(vsub_vd_vd_vd(v0, v1), v2, v3, v4); } static INLINE CONST vdouble vsub_vd_6vd(vdouble v0, vdouble v1, vdouble v2, vdouble v3, vdouble v4, vdouble v5) { return vsub_vd_5vd(vsub_vd_vd_vd(v0, v1), v2, v3, v4, v5); } // static INLINE CONST vdouble2 ddneg_vd2_vd2(vdouble2 x) { return vcast_vd2_vd_vd(vneg_vd_vd(x.x), vneg_vd_vd(x.y)); } static INLINE CONST vdouble2 ddabs_vd2_vd2(vdouble2 x) { return vcast_vd2_vd_vd(vabs_vd_vd(x.x), vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(x.y), vand_vm_vm_vm(vreinterpret_vm_vd(x.x), vreinterpret_vm_vd(vcast_vd_d(-0.0)))))); } static INLINE CONST vdouble2 ddnormalize_vd2_vd2(vdouble2 t) { vdouble2 s; s.x = vadd_vd_vd_vd(t.x, t.y); s.y = vadd_vd_vd_vd(vsub_vd_vd_vd(t.x, s.x), t.y); return s; } static INLINE CONST vdouble2 ddscale_vd2_vd2_vd(vdouble2 d, vdouble s) { vdouble2 r = {vmul_vd_vd_vd(d.x, s), vmul_vd_vd_vd(d.y, s)}; return r; } static INLINE CONST vdouble2 ddadd_vd2_vd_vd(vdouble x, vdouble y) { vdouble2 r; r.x = vadd_vd_vd_vd(x, y); r.y = vadd_vd_vd_vd(vsub_vd_vd_vd(x, r.x), y); return r; } static INLINE CONST vdouble2 ddadd2_vd2_vd_vd(vdouble x, vdouble y) { vdouble2 r; r.x = vadd_vd_vd_vd(x, y); vdouble v = vsub_vd_vd_vd(r.x, x); r.y = vadd_vd_vd_vd(vsub_vd_vd_vd(x, vsub_vd_vd_vd(r.x, v)), vsub_vd_vd_vd(y, v)); return r; } static INLINE CONST vdouble2 ddadd_vd2_vd2_vd(vdouble2 x, vdouble y) { vdouble2 r; r.x = vadd_vd_vd_vd(x.x, y); r.y = vadd_vd_3vd(vsub_vd_vd_vd(x.x, r.x), y, x.y); return r; } static INLINE CONST vdouble2 ddsub_vd2_vd2_vd(vdouble2 x, vdouble y) { vdouble2 r; r.x = vsub_vd_vd_vd(x.x, y); r.y = vadd_vd_vd_vd(vsub_vd_vd_vd(vsub_vd_vd_vd(x.x, r.x), y), x.y); return r; } static INLINE CONST vdouble2 ddadd2_vd2_vd2_vd(vdouble2 x, vdouble y) { vdouble2 r; r.x = vadd_vd_vd_vd(x.x, y); vdouble v = vsub_vd_vd_vd(r.x, x.x); r.y = vadd_vd_vd_vd(vsub_vd_vd_vd(x.x, vsub_vd_vd_vd(r.x, v)), vsub_vd_vd_vd(y, v)); r.y = vadd_vd_vd_vd(r.y, x.y); return r; } static INLINE CONST vdouble2 ddadd_vd2_vd_vd2(vdouble x, vdouble2 y) { vdouble2 r; r.x = vadd_vd_vd_vd(x, y.x); r.y = vadd_vd_3vd(vsub_vd_vd_vd(x, r.x), y.x, y.y); return r; } static INLINE CONST vdouble2 ddadd2_vd2_vd_vd2(vdouble x, vdouble2 y) { vdouble2 r; r.x = vadd_vd_vd_vd(x, y.x); vdouble v = vsub_vd_vd_vd(r.x, x); r.y = vadd_vd_vd_vd(vadd_vd_vd_vd(vsub_vd_vd_vd(x, vsub_vd_vd_vd(r.x, v)), vsub_vd_vd_vd(y.x, v)), y.y); return r; } static INLINE CONST vdouble2 ddadd_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { // |x| >= |y| vdouble2 r; r.x = vadd_vd_vd_vd(x.x, y.x); r.y = vadd_vd_4vd(vsub_vd_vd_vd(x.x, r.x), y.x, x.y, y.y); return r; } static INLINE CONST vdouble2 ddadd2_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { vdouble2 r; r.x = vadd_vd_vd_vd(x.x, y.x); vdouble v = vsub_vd_vd_vd(r.x, x.x); r.y = vadd_vd_vd_vd(vsub_vd_vd_vd(x.x, vsub_vd_vd_vd(r.x, v)), vsub_vd_vd_vd(y.x, v)); r.y = vadd_vd_vd_vd(r.y, vadd_vd_vd_vd(x.y, y.y)); return r; } static INLINE CONST vdouble2 ddsub_vd2_vd_vd(vdouble x, vdouble y) { // |x| >= |y| vdouble2 r; r.x = vsub_vd_vd_vd(x, y); r.y = vsub_vd_vd_vd(vsub_vd_vd_vd(x, r.x), y); return r; } static INLINE CONST vdouble2 ddsub_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { // |x| >= |y| vdouble2 r; r.x = vsub_vd_vd_vd(x.x, y.x); r.y = vsub_vd_vd_vd(x.x, r.x); r.y = vsub_vd_vd_vd(r.y, y.x); r.y = vadd_vd_vd_vd(r.y, x.y); r.y = vsub_vd_vd_vd(r.y, y.y); return r; } #ifdef ENABLE_FMA_DP static INLINE CONST vdouble2 dddiv_vd2_vd2_vd2(vdouble2 n, vdouble2 d) { vdouble2 q; vdouble t = vrec_vd_vd(d.x), u; q.x = vmul_vd_vd_vd(n.x, t); u = vfmapn_vd_vd_vd_vd(t, n.x, q.x); q.y = vfmanp_vd_vd_vd_vd(d.y, t, vfmanp_vd_vd_vd_vd(d.x, t, vcast_vd_d(1))); q.y = vfma_vd_vd_vd_vd(q.x, q.y, vfma_vd_vd_vd_vd(n.y, t, u)); return q; } static INLINE CONST vdouble2 ddmul_vd2_vd_vd(vdouble x, vdouble y) { vdouble2 r; r.x = vmul_vd_vd_vd(x, y); r.y = vfmapn_vd_vd_vd_vd(x, y, r.x); return r; } static INLINE CONST vdouble2 ddsqu_vd2_vd2(vdouble2 x) { vdouble2 r; r.x = vmul_vd_vd_vd(x.x, x.x); r.y = vfma_vd_vd_vd_vd(vadd_vd_vd_vd(x.x, x.x), x.y, vfmapn_vd_vd_vd_vd(x.x, x.x, r.x)); return r; } static INLINE CONST vdouble2 ddmul_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { vdouble2 r; r.x = vmul_vd_vd_vd(x.x, y.x); r.y = vfma_vd_vd_vd_vd(x.x, y.y, vfma_vd_vd_vd_vd(x.y, y.x, vfmapn_vd_vd_vd_vd(x.x, y.x, r.x))); return r; } static INLINE CONST vdouble ddmul_vd_vd2_vd2(vdouble2 x, vdouble2 y) { return vfma_vd_vd_vd_vd(x.x, y.x, vfma_vd_vd_vd_vd(x.y, y.x, vmul_vd_vd_vd(x.x, y.y))); } static INLINE CONST vdouble ddsqu_vd_vd2(vdouble2 x) { return vfma_vd_vd_vd_vd(x.x, x.x, vadd_vd_vd_vd(vmul_vd_vd_vd(x.x, x.y), vmul_vd_vd_vd(x.x, x.y))); } static INLINE CONST vdouble2 ddmul_vd2_vd2_vd(vdouble2 x, vdouble y) { vdouble2 r; r.x = vmul_vd_vd_vd(x.x, y); r.y = vfma_vd_vd_vd_vd(x.y, y, vfmapn_vd_vd_vd_vd(x.x, y, r.x)); return r; } static INLINE CONST vdouble2 ddrec_vd2_vd(vdouble d) { vdouble2 q; q.x = vrec_vd_vd(d); q.y = vmul_vd_vd_vd(q.x, vfmanp_vd_vd_vd_vd(d, q.x, vcast_vd_d(1))); return q; } static INLINE CONST vdouble2 ddrec_vd2_vd2(vdouble2 d) { vdouble2 q; q.x = vrec_vd_vd(d.x); q.y = vmul_vd_vd_vd(q.x, vfmanp_vd_vd_vd_vd(d.y, q.x, vfmanp_vd_vd_vd_vd(d.x, q.x, vcast_vd_d(1)))); return q; } #else static INLINE CONST vdouble2 dddiv_vd2_vd2_vd2(vdouble2 n, vdouble2 d) { vdouble t = vrec_vd_vd(d.x); vdouble dh = vupper_vd_vd(d.x), dl = vsub_vd_vd_vd(d.x, dh); vdouble th = vupper_vd_vd(t ), tl = vsub_vd_vd_vd(t , th); vdouble nhh = vupper_vd_vd(n.x), nhl = vsub_vd_vd_vd(n.x, nhh); vdouble2 q; q.x = vmul_vd_vd_vd(n.x, t); vdouble u = vadd_vd_5vd(vsub_vd_vd_vd(vmul_vd_vd_vd(nhh, th), q.x), vmul_vd_vd_vd(nhh, tl), vmul_vd_vd_vd(nhl, th), vmul_vd_vd_vd(nhl, tl), vmul_vd_vd_vd(q.x, vsub_vd_5vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl)))); q.y = vmla_vd_vd_vd_vd(t, vsub_vd_vd_vd(n.y, vmul_vd_vd_vd(q.x, d.y)), u); return q; } static INLINE CONST vdouble2 ddmul_vd2_vd_vd(vdouble x, vdouble y) { vdouble xh = vupper_vd_vd(x), xl = vsub_vd_vd_vd(x, xh); vdouble yh = vupper_vd_vd(y), yl = vsub_vd_vd_vd(y, yh); vdouble2 r; r.x = vmul_vd_vd_vd(x, y); r.y = vadd_vd_5vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(r.x), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl)); return r; } static INLINE CONST vdouble2 ddmul_vd2_vd2_vd(vdouble2 x, vdouble y) { vdouble xh = vupper_vd_vd(x.x), xl = vsub_vd_vd_vd(x.x, xh); vdouble yh = vupper_vd_vd(y ), yl = vsub_vd_vd_vd(y , yh); vdouble2 r; r.x = vmul_vd_vd_vd(x.x, y); r.y = vadd_vd_6vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(r.x), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(x.y, y)); return r; } static INLINE CONST vdouble2 ddmul_vd2_vd2_vd2(vdouble2 x, vdouble2 y) { vdouble xh = vupper_vd_vd(x.x), xl = vsub_vd_vd_vd(x.x, xh); vdouble yh = vupper_vd_vd(y.x), yl = vsub_vd_vd_vd(y.x, yh); vdouble2 r; r.x = vmul_vd_vd_vd(x.x, y.x); r.y = vadd_vd_7vd(vmul_vd_vd_vd(xh, yh), vneg_vd_vd(r.x), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(x.x, y.y), vmul_vd_vd_vd(x.y, y.x)); return r; } static INLINE CONST vdouble ddmul_vd_vd2_vd2(vdouble2 x, vdouble2 y) { vdouble xh = vupper_vd_vd(x.x), xl = vsub_vd_vd_vd(x.x, xh); vdouble yh = vupper_vd_vd(y.x), yl = vsub_vd_vd_vd(y.x, yh); return vadd_vd_6vd(vmul_vd_vd_vd(x.y, yh), vmul_vd_vd_vd(xh, y.y), vmul_vd_vd_vd(xl, yl), vmul_vd_vd_vd(xh, yl), vmul_vd_vd_vd(xl, yh), vmul_vd_vd_vd(xh, yh)); } static INLINE CONST vdouble2 ddsqu_vd2_vd2(vdouble2 x) { vdouble xh = vupper_vd_vd(x.x), xl = vsub_vd_vd_vd(x.x, xh); vdouble2 r; r.x = vmul_vd_vd_vd(x.x, x.x); r.y = vadd_vd_5vd(vmul_vd_vd_vd(xh, xh), vneg_vd_vd(r.x), vmul_vd_vd_vd(vadd_vd_vd_vd(xh, xh), xl), vmul_vd_vd_vd(xl, xl), vmul_vd_vd_vd(x.x, vadd_vd_vd_vd(x.y, x.y))); return r; } static INLINE CONST vdouble ddsqu_vd_vd2(vdouble2 x) { vdouble xh = vupper_vd_vd(x.x), xl = vsub_vd_vd_vd(x.x, xh); return vadd_vd_5vd(vmul_vd_vd_vd(xh, x.y), vmul_vd_vd_vd(xh, x.y), vmul_vd_vd_vd(xl, xl), vadd_vd_vd_vd(vmul_vd_vd_vd(xh, xl), vmul_vd_vd_vd(xh, xl)), vmul_vd_vd_vd(xh, xh)); } static INLINE CONST vdouble2 ddrec_vd2_vd(vdouble d) { vdouble t = vrec_vd_vd(d); vdouble dh = vupper_vd_vd(d), dl = vsub_vd_vd_vd(d, dh); vdouble th = vupper_vd_vd(t), tl = vsub_vd_vd_vd(t, th); vdouble2 q; q.x = t; q.y = vmul_vd_vd_vd(t, vsub_vd_5vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl))); return q; } static INLINE CONST vdouble2 ddrec_vd2_vd2(vdouble2 d) { vdouble t = vrec_vd_vd(d.x); vdouble dh = vupper_vd_vd(d.x), dl = vsub_vd_vd_vd(d.x, dh); vdouble th = vupper_vd_vd(t ), tl = vsub_vd_vd_vd(t , th); vdouble2 q; q.x = t; q.y = vmul_vd_vd_vd(t, vsub_vd_6vd(vcast_vd_d(1), vmul_vd_vd_vd(dh, th), vmul_vd_vd_vd(dh, tl), vmul_vd_vd_vd(dl, th), vmul_vd_vd_vd(dl, tl), vmul_vd_vd_vd(d.y, t))); return q; } #endif static INLINE CONST vdouble2 ddsqrt_vd2_vd2(vdouble2 d) { vdouble t = vsqrt_vd_vd(vadd_vd_vd_vd(d.x, d.y)); return ddscale_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd2(d, ddmul_vd2_vd_vd(t, t)), ddrec_vd2_vd(t)), vcast_vd_d(0.5)); } static INLINE CONST vdouble2 ddsqrt_vd2_vd(vdouble d) { vdouble t = vsqrt_vd_vd(d); return ddscale_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd_vd2(d, ddmul_vd2_vd_vd(t, t)), ddrec_vd2_vd(t)), vcast_vd_d(0.5)); } sleef-3.3.1/src/libm/df.h000066400000000000000000000321451333715643700151110ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifdef ENABLE_SVE typedef __sizeless_struct vfloat2 { svfloat32_t x; svfloat32_t y; } vfloat2; #else typedef struct { vfloat x, y; } vfloat2; #endif static INLINE CONST vfloat vupper_vf_vf(vfloat d) { return vreinterpret_vf_vi2(vand_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vcast_vi2_i(0xfffff000))); } static INLINE CONST vfloat2 vcast_vf2_vf_vf(vfloat h, vfloat l) { vfloat2 ret = {h, l}; return ret; } static INLINE CONST vfloat2 vcast_vf2_f_f(float h, float l) { vfloat2 ret = {vcast_vf_f(h), vcast_vf_f(l)}; return ret; } static INLINE CONST vfloat2 vcast_vf2_d(double d) { vfloat2 ret = {vcast_vf_f(d), vcast_vf_f(d - (float)d)}; return ret; } static INLINE CONST vfloat2 vsel_vf2_vo_vf2_vf2(vopmask m, vfloat2 x, vfloat2 y) { vfloat2 r; r.x = vsel_vf_vo_vf_vf(m, x.x, y.x); r.y = vsel_vf_vo_vf_vf(m, x.y, y.y); return r; } static INLINE CONST vfloat2 vsel_vf2_vo_f_f_f_f(vopmask o, float x1, float y1, float x0, float y0) { vfloat2 r; r.x = vsel_vf_vo_f_f(o, x1, x0); r.y = vsel_vf_vo_f_f(o, y1, y0); return r; } static INLINE CONST vfloat2 vsel_vf2_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) { return vsel_vf2_vo_vf2_vf2(o0, vcast_vf2_d(d0), vsel_vf2_vo_vf2_vf2(o1, vcast_vf2_d(d1), vcast_vf2_d(d2))); } static INLINE CONST vfloat2 vsel_vf2_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) { return vsel_vf2_vo_vf2_vf2(o0, vcast_vf2_d(d0), vsel_vf2_vo_vf2_vf2(o1, vcast_vf2_d(d1), vsel_vf2_vo_vf2_vf2(o2, vcast_vf2_d(d2), vcast_vf2_d(d3)))); } static INLINE CONST vfloat2 vabs_vf2_vf2(vfloat2 x) { return vcast_vf2_vf_vf(vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0)), vreinterpret_vm_vf(x.x)), vreinterpret_vm_vf(x.x))), vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0)), vreinterpret_vm_vf(x.x)), vreinterpret_vm_vf(x.y)))); } static INLINE CONST vfloat vadd_vf_3vf(vfloat v0, vfloat v1, vfloat v2) { return vadd_vf_vf_vf(vadd_vf_vf_vf(v0, v1), v2); } static INLINE CONST vfloat vadd_vf_4vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3) { return vadd_vf_3vf(vadd_vf_vf_vf(v0, v1), v2, v3); } static INLINE CONST vfloat vadd_vf_5vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4) { return vadd_vf_4vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4); } static INLINE CONST vfloat vadd_vf_6vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4, vfloat v5) { return vadd_vf_5vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4, v5); } static INLINE CONST vfloat vadd_vf_7vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4, vfloat v5, vfloat v6) { return vadd_vf_6vf(vadd_vf_vf_vf(v0, v1), v2, v3, v4, v5, v6); } static INLINE CONST vfloat vsub_vf_3vf(vfloat v0, vfloat v1, vfloat v2) { return vsub_vf_vf_vf(vsub_vf_vf_vf(v0, v1), v2); } static INLINE CONST vfloat vsub_vf_4vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3) { return vsub_vf_3vf(vsub_vf_vf_vf(v0, v1), v2, v3); } static INLINE CONST vfloat vsub_vf_5vf(vfloat v0, vfloat v1, vfloat v2, vfloat v3, vfloat v4) { return vsub_vf_4vf(vsub_vf_vf_vf(v0, v1), v2, v3, v4); } // static INLINE CONST vfloat2 dfneg_vf2_vf2(vfloat2 x) { return vcast_vf2_vf_vf(vneg_vf_vf(x.x), vneg_vf_vf(x.y)); } static INLINE CONST vfloat2 dfabs_vf2_vf2(vfloat2 x) { return vcast_vf2_vf_vf(vabs_vf_vf(x.x), vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x.y), vand_vm_vm_vm(vreinterpret_vm_vf(x.x), vreinterpret_vm_vf(vcast_vf_f(-0.0f)))))); } static INLINE CONST vfloat2 dfnormalize_vf2_vf2(vfloat2 t) { vfloat2 s; s.x = vadd_vf_vf_vf(t.x, t.y); s.y = vadd_vf_vf_vf(vsub_vf_vf_vf(t.x, s.x), t.y); return s; } static INLINE CONST vfloat2 dfscale_vf2_vf2_vf(vfloat2 d, vfloat s) { vfloat2 r = {vmul_vf_vf_vf(d.x, s), vmul_vf_vf_vf(d.y, s)}; return r; } static INLINE CONST vfloat2 dfadd_vf2_vf_vf(vfloat x, vfloat y) { vfloat2 r; r.x = vadd_vf_vf_vf(x, y); r.y = vadd_vf_vf_vf(vsub_vf_vf_vf(x, r.x), y); return r; } static INLINE CONST vfloat2 dfadd2_vf2_vf_vf(vfloat x, vfloat y) { vfloat2 r; r.x = vadd_vf_vf_vf(x, y); vfloat v = vsub_vf_vf_vf(r.x, x); r.y = vadd_vf_vf_vf(vsub_vf_vf_vf(x, vsub_vf_vf_vf(r.x, v)), vsub_vf_vf_vf(y, v)); return r; } static INLINE CONST vfloat2 dfadd2_vf2_vf_vf2(vfloat x, vfloat2 y) { vfloat2 r; r.x = vadd_vf_vf_vf(x, y.x); vfloat v = vsub_vf_vf_vf(r.x, x); r.y = vadd_vf_vf_vf(vadd_vf_vf_vf(vsub_vf_vf_vf(x, vsub_vf_vf_vf(r.x, v)), vsub_vf_vf_vf(y.x, v)), y.y); return r; } static INLINE CONST vfloat2 dfadd_vf2_vf2_vf(vfloat2 x, vfloat y) { vfloat2 r; r.x = vadd_vf_vf_vf(x.x, y); r.y = vadd_vf_3vf(vsub_vf_vf_vf(x.x, r.x), y, x.y); return r; } static INLINE CONST vfloat2 dfsub_vf2_vf2_vf(vfloat2 x, vfloat y) { vfloat2 r; r.x = vsub_vf_vf_vf(x.x, y); r.y = vadd_vf_vf_vf(vsub_vf_vf_vf(vsub_vf_vf_vf(x.x, r.x), y), x.y); return r; } static INLINE CONST vfloat2 dfadd2_vf2_vf2_vf(vfloat2 x, vfloat y) { vfloat2 r; r.x = vadd_vf_vf_vf(x.x, y); vfloat v = vsub_vf_vf_vf(r.x, x.x); r.y = vadd_vf_vf_vf(vsub_vf_vf_vf(x.x, vsub_vf_vf_vf(r.x, v)), vsub_vf_vf_vf(y, v)); r.y = vadd_vf_vf_vf(r.y, x.y); return r; } static INLINE CONST vfloat2 dfadd_vf2_vf_vf2(vfloat x, vfloat2 y) { vfloat2 r; r.x = vadd_vf_vf_vf(x, y.x); r.y = vadd_vf_3vf(vsub_vf_vf_vf(x, r.x), y.x, y.y); return r; } static INLINE CONST vfloat2 dfadd_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { // |x| >= |y| vfloat2 r; r.x = vadd_vf_vf_vf(x.x, y.x); r.y = vadd_vf_4vf(vsub_vf_vf_vf(x.x, r.x), y.x, x.y, y.y); return r; } static INLINE CONST vfloat2 dfadd2_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { vfloat2 r; r.x = vadd_vf_vf_vf(x.x, y.x); vfloat v = vsub_vf_vf_vf(r.x, x.x); r.y = vadd_vf_vf_vf(vsub_vf_vf_vf(x.x, vsub_vf_vf_vf(r.x, v)), vsub_vf_vf_vf(y.x, v)); r.y = vadd_vf_vf_vf(r.y, vadd_vf_vf_vf(x.y, y.y)); return r; } static INLINE CONST vfloat2 dfsub_vf2_vf_vf(vfloat x, vfloat y) { // |x| >= |y| vfloat2 r; r.x = vsub_vf_vf_vf(x, y); r.y = vsub_vf_vf_vf(vsub_vf_vf_vf(x, r.x), y); return r; } static INLINE CONST vfloat2 dfsub_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { // |x| >= |y| vfloat2 r; r.x = vsub_vf_vf_vf(x.x, y.x); r.y = vsub_vf_vf_vf(x.x, r.x); r.y = vsub_vf_vf_vf(r.y, y.x); r.y = vadd_vf_vf_vf(r.y, x.y); r.y = vsub_vf_vf_vf(r.y, y.y); return r; } #ifdef ENABLE_FMA_SP static INLINE CONST vfloat2 dfdiv_vf2_vf2_vf2(vfloat2 n, vfloat2 d) { vfloat2 q; vfloat t = vrec_vf_vf(d.x), u; q.x = vmul_vf_vf_vf(n.x, t); u = vfmapn_vf_vf_vf_vf(t, n.x, q.x); q.y = vfmanp_vf_vf_vf_vf(d.y, t, vfmanp_vf_vf_vf_vf(d.x, t, vcast_vf_f(1))); q.y = vfma_vf_vf_vf_vf(q.x, q.y, vfma_vf_vf_vf_vf(n.y, t, u)); return q; } static INLINE CONST vfloat2 dfmul_vf2_vf_vf(vfloat x, vfloat y) { vfloat2 r; r.x = vmul_vf_vf_vf(x, y); r.y = vfmapn_vf_vf_vf_vf(x, y, r.x); return r; } static INLINE CONST vfloat2 dfsqu_vf2_vf2(vfloat2 x) { vfloat2 r; r.x = vmul_vf_vf_vf(x.x, x.x); r.y = vfma_vf_vf_vf_vf(vadd_vf_vf_vf(x.x, x.x), x.y, vfmapn_vf_vf_vf_vf(x.x, x.x, r.x)); return r; } static INLINE CONST vfloat dfsqu_vf_vf2(vfloat2 x) { return vfma_vf_vf_vf_vf(x.x, x.x, vadd_vf_vf_vf(vmul_vf_vf_vf(x.x, x.y), vmul_vf_vf_vf(x.x, x.y))); } static INLINE CONST vfloat2 dfmul_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { vfloat2 r; r.x = vmul_vf_vf_vf(x.x, y.x); r.y = vfma_vf_vf_vf_vf(x.x, y.y, vfma_vf_vf_vf_vf(x.y, y.x, vfmapn_vf_vf_vf_vf(x.x, y.x, r.x))); return r; } static INLINE CONST vfloat dfmul_vf_vf2_vf2(vfloat2 x, vfloat2 y) { return vfma_vf_vf_vf_vf(x.x, y.x, vfma_vf_vf_vf_vf(x.y, y.x, vmul_vf_vf_vf(x.x, y.y))); } static INLINE CONST vfloat2 dfmul_vf2_vf2_vf(vfloat2 x, vfloat y) { vfloat2 r; r.x = vmul_vf_vf_vf(x.x, y); r.y = vfma_vf_vf_vf_vf(x.y, y, vfmapn_vf_vf_vf_vf(x.x, y, r.x)); return r; } static INLINE CONST vfloat2 dfrec_vf2_vf(vfloat d) { vfloat2 q; q.x = vrec_vf_vf(d); q.y = vmul_vf_vf_vf(q.x, vfmanp_vf_vf_vf_vf(d, q.x, vcast_vf_f(1))); return q; } static INLINE CONST vfloat2 dfrec_vf2_vf2(vfloat2 d) { vfloat2 q; q.x = vrec_vf_vf(d.x); q.y = vmul_vf_vf_vf(q.x, vfmanp_vf_vf_vf_vf(d.y, q.x, vfmanp_vf_vf_vf_vf(d.x, q.x, vcast_vf_f(1)))); return q; } #else static INLINE CONST vfloat2 dfdiv_vf2_vf2_vf2(vfloat2 n, vfloat2 d) { vfloat t = vrec_vf_vf(d.x); vfloat dh = vupper_vf_vf(d.x), dl = vsub_vf_vf_vf(d.x, dh); vfloat th = vupper_vf_vf(t ), tl = vsub_vf_vf_vf(t , th); vfloat nhh = vupper_vf_vf(n.x), nhl = vsub_vf_vf_vf(n.x, nhh); vfloat2 q; q.x = vmul_vf_vf_vf(n.x, t); vfloat u, w; w = vcast_vf_f(-1); w = vmla_vf_vf_vf_vf(dh, th, w); w = vmla_vf_vf_vf_vf(dh, tl, w); w = vmla_vf_vf_vf_vf(dl, th, w); w = vmla_vf_vf_vf_vf(dl, tl, w); w = vneg_vf_vf(w); u = vmla_vf_vf_vf_vf(nhh, th, vneg_vf_vf(q.x)); u = vmla_vf_vf_vf_vf(nhh, tl, u); u = vmla_vf_vf_vf_vf(nhl, th, u); u = vmla_vf_vf_vf_vf(nhl, tl, u); u = vmla_vf_vf_vf_vf(q.x, w , u); q.y = vmla_vf_vf_vf_vf(t, vsub_vf_vf_vf(n.y, vmul_vf_vf_vf(q.x, d.y)), u); return q; } static INLINE CONST vfloat2 dfmul_vf2_vf_vf(vfloat x, vfloat y) { vfloat xh = vupper_vf_vf(x), xl = vsub_vf_vf_vf(x, xh); vfloat yh = vupper_vf_vf(y), yl = vsub_vf_vf_vf(y, yh); vfloat2 r; r.x = vmul_vf_vf_vf(x, y); vfloat t; t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(r.x)); t = vmla_vf_vf_vf_vf(xl, yh, t); t = vmla_vf_vf_vf_vf(xh, yl, t); t = vmla_vf_vf_vf_vf(xl, yl, t); r.y = t; return r; } static INLINE CONST vfloat2 dfmul_vf2_vf2_vf(vfloat2 x, vfloat y) { vfloat xh = vupper_vf_vf(x.x), xl = vsub_vf_vf_vf(x.x, xh); vfloat yh = vupper_vf_vf(y ), yl = vsub_vf_vf_vf(y , yh); vfloat2 r; r.x = vmul_vf_vf_vf(x.x, y); vfloat t; t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(r.x)); t = vmla_vf_vf_vf_vf(xl, yh, t); t = vmla_vf_vf_vf_vf(xh, yl, t); t = vmla_vf_vf_vf_vf(xl, yl, t); t = vmla_vf_vf_vf_vf(x.y, y, t); r.y = t; return r; } static INLINE CONST vfloat2 dfmul_vf2_vf2_vf2(vfloat2 x, vfloat2 y) { vfloat xh = vupper_vf_vf(x.x), xl = vsub_vf_vf_vf(x.x, xh); vfloat yh = vupper_vf_vf(y.x), yl = vsub_vf_vf_vf(y.x, yh); vfloat2 r; r.x = vmul_vf_vf_vf(x.x, y.x); vfloat t; t = vmla_vf_vf_vf_vf(xh, yh, vneg_vf_vf(r.x)); t = vmla_vf_vf_vf_vf(xl, yh, t); t = vmla_vf_vf_vf_vf(xh, yl, t); t = vmla_vf_vf_vf_vf(xl, yl, t); t = vmla_vf_vf_vf_vf(x.x, y.y, t); t = vmla_vf_vf_vf_vf(x.y, y.x, t); r.y = t; return r; } static INLINE CONST vfloat dfmul_vf_vf2_vf2(vfloat2 x, vfloat2 y) { vfloat xh = vupper_vf_vf(x.x), xl = vsub_vf_vf_vf(x.x, xh); vfloat yh = vupper_vf_vf(y.x), yl = vsub_vf_vf_vf(y.x, yh); return vadd_vf_6vf(vmul_vf_vf_vf(x.y, yh), vmul_vf_vf_vf(xh, y.y), vmul_vf_vf_vf(xl, yl), vmul_vf_vf_vf(xh, yl), vmul_vf_vf_vf(xl, yh), vmul_vf_vf_vf(xh, yh)); } static INLINE CONST vfloat2 dfsqu_vf2_vf2(vfloat2 x) { vfloat xh = vupper_vf_vf(x.x), xl = vsub_vf_vf_vf(x.x, xh); vfloat2 r; r.x = vmul_vf_vf_vf(x.x, x.x); vfloat t; t = vmla_vf_vf_vf_vf(xh, xh, vneg_vf_vf(r.x)); t = vmla_vf_vf_vf_vf(vadd_vf_vf_vf(xh, xh), xl, t); t = vmla_vf_vf_vf_vf(xl, xl, t); t = vmla_vf_vf_vf_vf(x.x, vadd_vf_vf_vf(x.y, x.y), t); r.y = t; return r; } static INLINE CONST vfloat dfsqu_vf_vf2(vfloat2 x) { vfloat xh = vupper_vf_vf(x.x), xl = vsub_vf_vf_vf(x.x, xh); return vadd_vf_5vf(vmul_vf_vf_vf(xh, x.y), vmul_vf_vf_vf(xh, x.y), vmul_vf_vf_vf(xl, xl), vadd_vf_vf_vf(vmul_vf_vf_vf(xh, xl), vmul_vf_vf_vf(xh, xl)), vmul_vf_vf_vf(xh, xh)); } static INLINE CONST vfloat2 dfrec_vf2_vf(vfloat d) { vfloat t = vrec_vf_vf(d); vfloat dh = vupper_vf_vf(d), dl = vsub_vf_vf_vf(d, dh); vfloat th = vupper_vf_vf(t), tl = vsub_vf_vf_vf(t, th); vfloat2 q; q.x = t; vfloat u = vcast_vf_f(-1); u = vmla_vf_vf_vf_vf(dh, th, u); u = vmla_vf_vf_vf_vf(dh, tl, u); u = vmla_vf_vf_vf_vf(dl, th, u); u = vmla_vf_vf_vf_vf(dl, tl, u); q.y = vmul_vf_vf_vf(vneg_vf_vf(t), u); return q; } static INLINE CONST vfloat2 dfrec_vf2_vf2(vfloat2 d) { vfloat t = vrec_vf_vf(d.x); vfloat dh = vupper_vf_vf(d.x), dl = vsub_vf_vf_vf(d.x, dh); vfloat th = vupper_vf_vf(t ), tl = vsub_vf_vf_vf(t , th); vfloat2 q; q.x = t; vfloat u = vcast_vf_f(-1); u = vmla_vf_vf_vf_vf(dh, th, u); u = vmla_vf_vf_vf_vf(dh, tl, u); u = vmla_vf_vf_vf_vf(dl, th, u); u = vmla_vf_vf_vf_vf(dl, tl, u); u = vmla_vf_vf_vf_vf(d.y, t, u); q.y = vmul_vf_vf_vf(vneg_vf_vf(t), u); return q; } #endif static INLINE CONST vfloat2 dfsqrt_vf2_vf2(vfloat2 d) { #ifdef ENABLE_RECSQRT_SP vfloat x = vrecsqrt_vf_vf(vadd_vf_vf_vf(d.x, d.y)); vfloat2 r = dfmul_vf2_vf2_vf(d, x); return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(r, dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(r, x), vcast_vf_f(-3.0))), vcast_vf_f(-0.5)); #else vfloat t = vsqrt_vf_vf(vadd_vf_vf_vf(d.x, d.y)); return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf2(d, dfmul_vf2_vf_vf(t, t)), dfrec_vf2_vf(t)), vcast_vf_f(0.5)); #endif } static INLINE CONST vfloat2 dfsqrt_vf2_vf(vfloat d) { vfloat t = vsqrt_vf_vf(d); return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf_vf2(d, dfmul_vf2_vf_vf(t, t)), dfrec_vf2_vf(t)), vcast_vf_f(0.5f)); } sleef-3.3.1/src/libm/dispavx.c.org000066400000000000000000000126351333715643700167610ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include "misc.h" #if defined(_MSC_VER) #include #else #include #endif #if (defined(__GNUC__) || defined(__CLANG__)) && !defined(__INTEL_COMPILER) #define CONST const #else #define CONST #endif #define IMPORT_IS_EXPORT #include "sleef.h" static int cpuSupportsAVX2() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 7, 0); ret = (reg[1] & (1 << 5)) != 0; } return ret; } static int cpuSupportsFMA() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); ret = (reg[2] & (1 << 12)) != 0; } return ret; } static int cpuSupportsFMA4() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 0x80000001, 0); ret = (reg[3] & (1 << 16)) != 0; } return ret; } #ifdef ENABLE_FMA4 #define SUBST_IF_FMA4(funcfma4) if (cpuSupportsFMA4()) p = funcfma4; #else #define SUBST_IF_FMA4(funcfma4) #endif #ifdef ENABLE_AVX2 #define SUBST_IF_AVX2(funcavx2) if (cpuSupportsAVX2() && cpuSupportsFMA()) p = funcavx2; #else #define SUBST_IF_AVX2(funcavx2) #endif #define DISPATCH_vf_vf(fptype, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST fptype (*pfn)(fptype arg0); \ static CONST fptype dfn(fptype arg0) { \ fptype CONST (*p)(fptype arg0) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST fptype (*pfn)(fptype arg0) = dfn; \ EXPORT CONST fptype funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vf(fptype, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST fptype (*pfn)(fptype arg0, fptype arg1); \ static CONST fptype dfn(fptype arg0, fptype arg1) { \ fptype CONST (*p)(fptype arg0, fptype arg1) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1); \ } \ static CONST fptype (*pfn)(fptype arg0, fptype arg1) = dfn; \ EXPORT CONST fptype funcName(fptype arg0, fptype arg1) { return (*pfn)(arg0, arg1); } #define DISPATCH_vf2_vf(fptype, fptype2, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST fptype2 (*pfn)(fptype arg0); \ static CONST fptype2 dfn(fptype arg0) { \ fptype2 CONST (*p)(fptype arg0) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST fptype2 (*pfn)(fptype arg0) = dfn; \ EXPORT CONST fptype2 funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vi(fptype, itype, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST fptype (*pfn)(fptype arg0, itype arg1); \ static CONST fptype dfn(fptype arg0, itype arg1) { \ fptype CONST (*p)(fptype arg0, itype arg1) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1); \ } \ static CONST fptype (*pfn)(fptype arg0, itype arg1) = dfn; \ EXPORT CONST fptype funcName(fptype arg0, itype arg1) { return (*pfn)(arg0, arg1); } #define DISPATCH_vi_vf(fptype, itype, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST itype (*pfn)(fptype arg0); \ static CONST itype dfn(fptype arg0) { \ itype CONST (*p)(fptype arg0) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST itype (*pfn)(fptype arg0) = dfn; \ EXPORT CONST itype funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vf_vf(fptype, funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST fptype (*pfn)(fptype arg0, fptype arg1, fptype arg2); \ static CONST fptype dfn(fptype arg0, fptype arg1, fptype arg2) { \ fptype CONST (*p)(fptype arg0, fptype arg1, fptype arg2) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1, arg2); \ } \ static CONST fptype (*pfn)(fptype arg0, fptype arg1, fptype arg2) = dfn; \ EXPORT CONST fptype funcName(fptype arg0, fptype arg1, fptype arg2) { return (*pfn)(arg0, arg1, arg2); } #define DISPATCH_i_i(funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST int (*pfn)(int arg0); \ static CONST int dfn(int arg0) { \ int CONST (*p)(int) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST int (*pfn)(int arg0) = dfn; \ EXPORT CONST int funcName(int arg0) { return (*pfn)(arg0); } #define DISPATCH_p_i(funcName, pfn, dfn, funcavx, funcfma4, funcavx2) \ static CONST void *(*pfn)(int arg0); \ static CONST void *dfn(int arg0) { \ CONST void *(*p)(int) = funcavx; \ SUBST_IF_FMA4(funcfma4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST void *(*pfn)(int arg0) = dfn; \ EXPORT CONST void *funcName(int arg0) { return (*pfn)(arg0); } // sleef-3.3.1/src/libm/dispsse.c.org000066400000000000000000000140551333715643700167530ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include "misc.h" #if defined(_MSC_VER) #include #else #include #endif #if (defined(__GNUC__) || defined(__CLANG__)) && !defined(__INTEL_COMPILER) #define CONST const #else #define CONST #endif #define IMPORT_IS_EXPORT #include "sleef.h" static int cpuSupportsSSE4_1() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); ret = (reg[2] & (1 << 19)) != 0; } return ret; } static int cpuSupportsAVX2() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 7, 0); ret = (reg[1] & (1 << 5)) != 0; } return ret; } static int cpuSupportsFMA() { static int ret = -1; if (ret == -1) { int32_t reg[4]; Sleef_x86CpuID(reg, 1, 0); ret = (reg[2] & (1 << 12)) != 0; } return ret; } #define SUBST_IF_SSE4(funcsse4) if (cpuSupportsSSE4_1()) p = funcsse4; #ifdef ENABLE_AVX2 #define SUBST_IF_AVX2(funcavx2) if (cpuSupportsAVX2() && cpuSupportsFMA()) p = funcavx2; #else #define SUBST_IF_AVX2(funcavx2) #endif /* * DISPATCH_R_X, DISPATCH_R_X_Y and DISPATCH_R_X_Y_Z are the macro for * defining dispatchers. R, X, Y and Z represent the data types of * return value, first argument, second argument and third argument, * respectively. vf, vi, i and p correspond to vector FP, vector * integer, scalar integer and scalar pointer types, respectively. * * The arguments for the macros are as follows: * fptype : FP type name * funcname : Fundamental function name * pfn : Name of pointer of the function to the dispatcher * dfn : Name of the dispatcher function * funcsse2 : Name of the SSE2 function * funcsse4 : Name of the SSE4 function * funcavx2 : Name of the AVX2 function */ #define DISPATCH_vf_vf(fptype, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST fptype (*pfn)(fptype arg0); \ static CONST fptype dfn(fptype arg0) { \ fptype CONST (*p)(fptype arg0) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST fptype (*pfn)(fptype arg0) = dfn; \ EXPORT CONST fptype funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vf(fptype, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST fptype (*pfn)(fptype arg0, fptype arg1); \ static CONST fptype dfn(fptype arg0, fptype arg1) { \ fptype CONST (*p)(fptype arg0, fptype arg1) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1); \ } \ static CONST fptype (*pfn)(fptype arg0, fptype arg1) = dfn; \ EXPORT CONST fptype funcName(fptype arg0, fptype arg1) { return (*pfn)(arg0, arg1); } #define DISPATCH_vf2_vf(fptype, fptype2, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST fptype2 (*pfn)(fptype arg0); \ static CONST fptype2 dfn(fptype arg0) { \ fptype2 CONST (*p)(fptype arg0) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST fptype2 (*pfn)(fptype arg0) = dfn; \ EXPORT CONST fptype2 funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vi(fptype, itype, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST fptype (*pfn)(fptype arg0, itype arg1); \ static CONST fptype dfn(fptype arg0, itype arg1) { \ fptype CONST (*p)(fptype arg0, itype arg1) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1); \ } \ static CONST fptype (*pfn)(fptype arg0, itype arg1) = dfn; \ EXPORT CONST fptype funcName(fptype arg0, itype arg1) { return (*pfn)(arg0, arg1); } #define DISPATCH_vi_vf(fptype, itype, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST itype (*pfn)(fptype arg0); \ static CONST itype dfn(fptype arg0) { \ itype CONST (*p)(fptype arg0) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST itype (*pfn)(fptype arg0) = dfn; \ EXPORT CONST itype funcName(fptype arg0) { return (*pfn)(arg0); } #define DISPATCH_vf_vf_vf_vf(fptype, funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST fptype (*pfn)(fptype arg0, fptype arg1, fptype arg2); \ static CONST fptype dfn(fptype arg0, fptype arg1, fptype arg2) { \ fptype CONST (*p)(fptype arg0, fptype arg1, fptype arg2) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0, arg1, arg2); \ } \ static CONST fptype (*pfn)(fptype arg0, fptype arg1, fptype arg2) = dfn; \ EXPORT CONST fptype funcName(fptype arg0, fptype arg1, fptype arg2) { return (*pfn)(arg0, arg1, arg2); } #define DISPATCH_i_i(funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST int (*pfn)(int arg0); \ static CONST int dfn(int arg0) { \ int CONST (*p)(int) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST int (*pfn)(int arg0) = dfn; \ EXPORT CONST int funcName(int arg0) { return (*pfn)(arg0); } #define DISPATCH_p_i(funcName, pfn, dfn, funcsse2, funcsse4, funcavx2) \ static CONST void *(*pfn)(int arg0); \ static CONST void *dfn(int arg0) { \ CONST void *(*p)(int) = funcsse2; \ SUBST_IF_SSE4(funcsse4); \ SUBST_IF_AVX2(funcavx2); \ pfn = p; \ return (*pfn)(arg0); \ } \ static CONST void *(*pfn)(int arg0) = dfn; \ EXPORT CONST void *funcName(int arg0) { return (*pfn)(arg0); } // sleef-3.3.1/src/libm/funcproto.h000066400000000000000000000054161333715643700165400ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) typedef struct { char *name; int ulp; int ulpSuffix; int funcType; int flags; } funcSpec; /* ulp : (error bound in ulp) * 10 ulpSuffix: 0 : "" 1 : "_u1" 2 : "_u05" 3 : "_u35" 4 : "_u15" funcType: 0 : vdouble func(vdouble); 1 : vdouble func(vdouble, vdouble); 2 : vdouble2 func(vdouble); GNUABI : void func(vdouble, double *, double *); 3 : vdouble func(vdouble, vint); 4 : vint func(vdouble); 5 : vdouble func(vdouble, vdouble, vdouble); 6 : vdouble2 func(vdouble); GNUABI : vdouble func(vdouble, double *); 7 : int func(int); 8 : void *func(int); flags: 1 : No GNUABI */ funcSpec funcList[] = { { "sin", 35, 0, 0, 0 }, { "cos", 35, 0, 0, 0 }, { "sincos", 35, 0, 2, 0 }, { "tan", 35, 0, 0, 0 }, { "asin", 35, 0, 0, 0 }, { "acos", 35, 0, 0, 0 }, { "atan", 35, 0, 0, 0 }, { "atan2", 35, 0, 1, 0 }, { "log", 35, 0, 0, 0 }, { "cbrt", 35, 0, 0, 0 }, { "sin", 10, 1, 0, 0 }, { "cos", 10, 1, 0, 0 }, { "sincos", 10, 1, 2, 0 }, { "tan", 10, 1, 0, 0 }, { "asin", 10, 1, 0, 0 }, { "acos", 10, 1, 0, 0 }, { "atan", 10, 1, 0, 0 }, { "atan2", 10, 1, 1, 0 }, { "log", 10, 1, 0, 0 }, { "cbrt", 10, 1, 0, 0 }, { "exp", 10, 0, 0, 0 }, { "pow", 10, 0, 1, 0 }, { "sinh", 10, 0, 0, 0 }, { "cosh", 10, 0, 0, 0 }, { "tanh", 10, 0, 0, 0 }, { "sinh", 35, 3, 0, 0 }, { "cosh", 35, 3, 0, 0 }, { "tanh", 35, 3, 0, 0 }, { "asinh", 10, 0, 0, 0 }, { "acosh", 10, 0, 0, 0 }, { "atanh", 10, 0, 0, 0 }, { "exp2", 10, 0, 0, 0 }, { "exp10", 10, 0, 0, 0 }, { "expm1", 10, 0, 0, 0 }, { "log10", 10, 0, 0, 0 }, { "log2", 10, 0, 0, 0 }, { "log1p", 10, 0, 0, 0 }, { "sincospi", 5, 2, 2, 0 }, { "sincospi", 35, 3, 2, 0 }, { "sinpi", 5, 2, 0, 0 }, { "cospi", 5, 2, 0, 0 }, { "ldexp", -1, 0, 3, 0 }, { "ilogb", -1, 0, 4, 0 }, { "fma", -1, 0, 5, 0 }, { "sqrt", -1, 0, 0, 0 }, { "sqrt", 5, 2, 0, 1 }, { "sqrt", 35, 3, 0, 0 }, { "hypot", 5, 2, 1, 0 }, { "hypot", 35, 3, 1, 0 }, { "fabs", -1, 0, 0, 0 }, { "copysign", -1, 0, 1, 0 }, { "fmax", -1, 0, 1, 0 }, { "fmin", -1, 0, 1, 0 }, { "fdim", -1, 0, 1, 0 }, { "trunc", -1, 0, 0, 0 }, { "floor", -1, 0, 0, 0 }, { "ceil", -1, 0, 0, 0 }, { "round", -1, 0, 0, 0 }, { "rint", -1, 0, 0, 0 }, { "nextafter", -1, 0, 1, 0 }, { "frfrexp", -1, 0, 0, 0 }, { "expfrexp", -1, 0, 4, 0 }, { "fmod", -1, 0, 1, 0 }, { "modf", -1, 0, 6, 0 }, { "lgamma", 10, 1, 0, 0 }, { "tgamma", 10, 1, 0, 0 }, { "erf", 10, 1, 0, 0 }, { "erfc", 15, 4, 0, 0 }, { "getInt", -1, 0, 7, 1}, { "getPtr", -1, 0, 8, 1}, { NULL, -1, 0, 0, 0 }, }; sleef-3.3.1/src/libm/mkalias.c000066400000000000000000000104651333715643700161350ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include "funcproto.h" int main(int argc, char **argv) { if (argc == 2 && strcmp(argv[1], "0") == 0) exit(0); if (argc < 6) { fprintf(stderr, "Usage : %s \n", argv[0]); exit(-1); } int vw = atoi(argv[1]); int fptype = vw >= 0 ? 0 : 1; vw = vw < 0 ? -vw : vw; char *mangledisa = argv[4]; char *isaname = argc == 6 ? argv[5] : ""; static char *argType2[] = { "a0", "a0, a1", "a0", "a0, a1", "a0", "a0, a1, a2", "a0", "a0", "a0" }; static char *typeSpecS[] = { "", "f" }; static char *typeSpec[] = { "d", "f" }; static char *ulpSuffixStr[] = { "", "_u1", "_u05", "_u35", "_u15" }; static char *vparameterStr[7] = { "v", "vv", "", "vv", "v", "vvv", "" }; static char returnType[9][1000]; static char argType0[9][1000]; static char argType1[9][1000]; sprintf(returnType[0], "%s", argv[2]); sprintf(returnType[1], "%s", argv[2]); sprintf(returnType[2], "%s", fptype ? "vfloat2" : "vdouble2"); sprintf(returnType[3], "%s", argv[2]); sprintf(returnType[4], "%s", argv[3]); sprintf(returnType[5], "%s", argv[2]); sprintf(returnType[6], "%s", fptype ? "vfloat2" : "vdouble2"); sprintf(returnType[7], "int"); sprintf(returnType[8], "void *"); sprintf(argType0[0], "%s", argv[2]); sprintf(argType0[1], "%s, %s", argv[2], argv[2]); sprintf(argType0[2], "%s", argv[2]); sprintf(argType0[3], "%s, %s", argv[2], argv[3]); sprintf(argType0[4], "%s", argv[2]); sprintf(argType0[5], "%s, %s, %s", argv[2], argv[2], argv[2]); sprintf(argType0[6], "%s", argv[2]); sprintf(argType0[7], "int"); sprintf(argType0[8], "int"); sprintf(argType1[0], "%s a0", argv[2]); sprintf(argType1[1], "%s a0, %s a1", argv[2], argv[2]); sprintf(argType1[2], "%s a0", argv[2]); sprintf(argType1[3], "%s a0, %s a1", argv[2], argv[3]); sprintf(argType1[4], "%s a0", argv[2]); sprintf(argType1[5], "%s a0, %s a1, %s a2", argv[2], argv[2], argv[2]); sprintf(argType1[6], "%s a0", argv[2]); sprintf(argType1[7], "int a0"); sprintf(argType1[8], "int a0"); // if (fptype == 0) { printf("#ifdef __SLEEFSIMDDP_C__\n"); } else { printf("#ifdef __SLEEFSIMDSP_C__\n"); } printf("#ifdef ENABLE_ALIAS\n"); if (argc == 6) { for(int i=0;funcList[i].name != NULL;i++) { if (funcList[i].ulp >= 0) { printf("EXPORT CONST %s Sleef_%s%s%d_u%02d(%s) __attribute__((alias(\"Sleef_%s%s%d_u%02d%s\")));\n", returnType[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, funcList[i].ulp, argType0[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, funcList[i].ulp, isaname ); } else { printf("EXPORT CONST %s Sleef_%s%s%d(%s) __attribute__((alias(\"Sleef_%s%s%d_%s\")));\n", returnType[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, argType0[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, isaname ); } } printf("\n"); } printf("#else // #ifdef ENABLE_ALIAS\n"); if (argc == 6) { for(int i=0;funcList[i].name != NULL;i++) { if (funcList[i].ulp >= 0) { printf("EXPORT CONST %s Sleef_%s%s%d_u%02d(%s) { return Sleef_%s%s%d_u%02d%s(%s); }\n", returnType[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, funcList[i].ulp, argType1[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, funcList[i].ulp, isaname, argType2[funcList[i].funcType] ); } else { printf("EXPORT CONST %s Sleef_%s%s%d(%s) { return Sleef_%s%s%d_%s(%s); }\n", returnType[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, argType1[funcList[i].funcType], funcList[i].name, typeSpec[fptype], vw, isaname, argType2[funcList[i].funcType] ); } } printf("\n"); } printf("#endif // #ifdef ENABLE_ALIAS\n"); if (fptype == 0) { printf("#endif // #ifdef __SLEEFSIMDDP_C__\n"); } else { printf("#endif // #ifdef __SLEEFSIMDSP_C__\n"); } exit(0); } sleef-3.3.1/src/libm/mkdisp.c000066400000000000000000000135541333715643700160050ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include "funcproto.h" int main(int argc, char **argv) { if (argc < 7) { fprintf(stderr, "Usage : %s [ ...]\n", argv[0]); fprintf(stderr, "\n"); exit(-1); } const int wdp = atoi(argv[1]), wsp = atoi(argv[2]); const char *vdoublename = argv[3], *vfloatname = argv[4], *vintname = argv[5]; const int isastart = 6, nisa = argc - isastart; for(int i=0;funcList[i].name != NULL;i++) { char ulpSuffix0[100] = "", ulpSuffix1[100] = "_"; if (funcList[i].ulp >= 0) { sprintf(ulpSuffix0, "_u%02d", funcList[i].ulp); sprintf(ulpSuffix1, "_u%02d", funcList[i].ulp); } switch(funcList[i].funcType) { case 0: printf("DISPATCH_vf_vf(%s, Sleef_%sd%d%s, pnt_%sd%d%s, disp_%sd%d%s", vdoublename, funcList[i].name, wdp, ulpSuffix0, funcList[i].name, wdp, ulpSuffix0, funcList[i].name, wdp, ulpSuffix0); for(int j=0;j #include #include #include #include #include "funcproto.h" int main(int argc, char **argv) { if (argc < 4) { fprintf(stderr, "\nUsage : %s \n\n", argv[0]); fprintf(stderr, "This program generates an include file defining masked functions.\n"); exit(-1); } // const char *isaname = argv[1]; const char *mangledisa = argv[2]; const int vw = atoi(argv[3]); int fptype = 0; // Remove the "-" sign in the SP value const char *cvw = (vw < 0) ? argv[3] + 1 : argv[3]; if (vw < 0) { fptype = 1; } // VLA SVE does not set the vector length in the mangled names. if (strcmp(isaname, "sve") == 0) cvw = "x"; // #define LEN 256 static char *vfpname[] = { "vdouble", "vfloat" }; static char *vintname[] = { "vint", "vint2" }; static int sizeoffp[] = { 8, 4 }; static char *ulpSuffixStr[] = { "", "_u1", "_u05", "_u35", "_u15" }; static char vparameterStr[7][LEN] = { "v", "vv", "vl8l8", "vv", "v", "vvv", "vl8" }; static char *typeSpecS[] = { "", "f" }; static char *typeSpec[] = { "d", "f" }; static char funcname[4][LEN]; snprintf(vparameterStr[2], LEN, "vl%dl%d", sizeoffp[fptype], sizeoffp[fptype]); snprintf(vparameterStr[6], LEN, "vl%d", sizeoffp[fptype]); // for(int i=0;funcList[i].name != NULL;i++) { if ((funcList[i].flags & 1) != 0) continue; if (funcList[i].ulp < 20) { snprintf(funcname[0], LEN, "_ZGV%sN%s%s_%s%s", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype]); snprintf(funcname[1], LEN, "_ZGV%sM%s%s_%s%s", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype]); } else { snprintf(funcname[0], LEN, "_ZGV%sN%s%s_%s%s_u%d", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype], funcList[i].ulp); snprintf(funcname[1], LEN, "_ZGV%sM%s%s_%s%s_u%d", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype], funcList[i].ulp); } snprintf(funcname[2], LEN, "_ZGV%sN%s%s___%s%s_finite", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype]); snprintf(funcname[3], LEN, "_ZGV%sM%s%s___%s%s_finite", mangledisa, cvw, vparameterStr[funcList[i].funcType], funcList[i].name, typeSpecS[fptype]); switch(funcList[i].funcType) { case 0: { printf("EXPORT CONST %s %s(%s a0, vopmask m) { return %s(a0); }\n", vfpname[fptype], funcname[1], vfpname[fptype], funcname[0]); if (funcList[i].ulp < 20) printf("EXPORT CONST %s %s(%s) __attribute__((weak, alias(\"%s\")));\n", vfpname[fptype], funcname[3], vfpname[fptype], funcname[0]); else printf("EXPORT CONST %s %s_u%d(%s) __attribute__((weak, alias(\"%s\")));\n", vfpname[fptype], funcname[3],funcList[i].ulp, vfpname[fptype], funcname[0]); break; } case 1: { printf("EXPORT CONST %s %s(%s a0, %s a1, vopmask m) { return %s(a0, a1); }\n", vfpname[fptype], funcname[1], vfpname[fptype], vfpname[fptype], funcname[0]); if (funcList[i].ulp < 20) printf("EXPORT CONST %s %s(%s, %s, vopmask) __attribute__((weak, alias(\"%s\")));\n", vfpname[fptype], funcname[3], vfpname[fptype], vfpname[fptype], funcname[0]); else printf("EXPORT CONST %s %s_u%d(%s, %s, vopmask) __attribute__((weak, alias(\"%s\")));\n", vfpname[fptype], funcname[3],funcList[i].ulp, vfpname[fptype], vfpname[fptype], funcname[0]); break; } case 2: if (sizeoffp[fptype] == sizeof(double)) { printf("#ifndef ENABLE_SVE\n"); printf("EXPORT void %s(vdouble a0, double *a1, double *a2, vopmask m) {\n", funcname[1]); printf(" double s[VECTLENDP], c[VECTLENDP];\n"); printf(" int32_t mbuf[VECTLENSP];\n"); printf(" %s(a0, s, c);\n", funcname[0]); printf(" vstoreu_v_p_vi2(mbuf, vcast_vi2_vm(vand_vm_vo64_vm(m, vcast_vm_i_i(-1, -1))));\n"); printf(" for(int i=0;i #include #include #include #include "funcproto.h" // In VSX intrinsics, vector data types are like "vector float". // This function replaces space characters with '_'. char *escapeSpace(char *str) { char *ret = malloc(strlen(str) + 10); strcpy(ret, str); for(char *p = ret;*p != '\0';p++) if (*p == ' ') *p = '_'; return ret; } int main(int argc, char **argv) { if (argc < 3) { fprintf(stderr, "Generate a header for renaming functions\n"); fprintf(stderr, "Usage : %s []\n", argv[0]); fprintf(stderr, "\n"); fprintf(stderr, "Generate a part of header for library functions\n"); fprintf(stderr, "Usage : %s []\n", argv[0]); fprintf(stderr, "\n"); exit(-1); } static char *ulpSuffixStr[] = { "", "_u1", "_u05", "_u35", "_u15" }; if (argc == 3 || argc == 4) { char *wdp = argv[1]; char *wsp = argv[2]; char *isaname = argc == 3 ? "" : argv[3]; char *isaub = argc == 4 ? "_" : ""; if (strcmp(isaname, "sve") == 0) wdp = wsp = "x"; for(int i=0;funcList[i].name != NULL;i++) { if (funcList[i].ulp >= 0) { printf("#define x%s%s Sleef_%sd%s_u%02d%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], funcList[i].name, wdp, funcList[i].ulp, isaname); } else { printf("#define x%s Sleef_%sd%s%s%s\n", funcList[i].name, funcList[i].name, wdp, isaub, isaname); } } printf("\n"); for(int i=0;funcList[i].name != NULL;i++) { if (funcList[i].ulp >= 0) { printf("#define x%sf%s Sleef_%sf%s_u%02d%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], funcList[i].name, wsp, funcList[i].ulp, isaname); } else { printf("#define x%sf Sleef_%sf%s%s%s\n", funcList[i].name, funcList[i].name, wsp, isaub, isaname); } } } else { char *wdp = argv[1]; char *wsp = argv[2]; char *vdoublename = argv[3], *vdoublename_escspace = escapeSpace(vdoublename); char *vfloatname = argv[4], *vfloatname_escspace = escapeSpace(vfloatname); char *vintname = argv[5], *vintname_escspace = escapeSpace(vintname); char *vint2name = argv[6], *vint2name_escspace = escapeSpace(vint2name); char *architecture = argv[7]; char *isaname = argc == 9 ? argv[8] : ""; char *isaub = argc == 9 ? "_" : ""; if (strcmp(isaname, "sve") == 0) wdp = wsp = "x"; printf("#ifdef %s\n", architecture); if (strcmp(architecture, "__ARM_FEATURE_SVE") == 0) printf("#define STRUCT_KEYWORD_%s __sizeless_struct\n", architecture); else printf("#define STRUCT_KEYWORD_%s struct\n", architecture); if (strcmp(vdoublename, "-") != 0) { printf("\n"); printf("#ifndef Sleef_%s_2_DEFINED\n", vdoublename_escspace); printf("typedef STRUCT_KEYWORD_%s {\n", architecture); printf(" %s x, y;\n", vdoublename); printf("} Sleef_%s_2;\n", vdoublename_escspace); printf("#define Sleef_%s_2_DEFINED\n", vdoublename_escspace); printf("#endif\n"); printf("\n"); for(int i=0;funcList[i].name != NULL;i++) { switch(funcList[i].funcType) { case 0: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sd%s_u%02d%s(%s);\n", vdoublename, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename); } else { printf("IMPORT CONST %s Sleef_%sd%s%s%s(%s);\n", vdoublename, funcList[i].name, wdp, isaub, isaname, vdoublename); } break; case 1: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sd%s_u%02d%s(%s, %s);\n", vdoublename, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vdoublename); } else { printf("IMPORT CONST %s Sleef_%sd%s%s%s(%s, %s);\n", vdoublename, funcList[i].name, wdp, isaub, isaname, vdoublename, vdoublename); } break; case 2: case 6: if (funcList[i].ulp >= 0) { printf("IMPORT CONST Sleef_%s_2 Sleef_%sd%s_u%02d%s(%s);\n", vdoublename_escspace, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename); } else { printf("IMPORT CONST Sleef_%s_2 Sleef_%sd%s%s%s(%s);\n", vdoublename_escspace, funcList[i].name, wdp, isaub, isaname, vdoublename); } break; case 3: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sd%s_u%02d%s(%s, %s);\n", vdoublename, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vintname); } else { printf("IMPORT CONST %s Sleef_%sd%s%s%s(%s, %s);\n", vdoublename, funcList[i].name, wdp, isaub, isaname, vdoublename, vintname); } break; case 4: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sd%s_u%02d%s(%s);\n", vintname, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename); } else { printf("IMPORT CONST %s Sleef_%sd%s%s%s(%s);\n", vintname, funcList[i].name, wdp, isaub, isaname, vdoublename); } break; case 5: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sd%s_u%02d%s(%s, %s, %s);\n", vdoublename, funcList[i].name, wdp, funcList[i].ulp, isaname, vdoublename, vdoublename, vdoublename); } else { printf("IMPORT CONST %s Sleef_%sd%s%s%s(%s, %s, %s);\n", vdoublename, funcList[i].name, wdp, isaub, isaname, vdoublename, vdoublename, vdoublename); } break; case 7: printf("IMPORT CONST int Sleef_%sd%s%s%s(int);\n", funcList[i].name, wdp, isaub, isaname); break; case 8: printf("IMPORT CONST void *Sleef_%sd%s%s%s(int);\n", funcList[i].name, wdp, isaub, isaname); break; } } } printf("\n"); printf("#ifndef Sleef_%s_2_DEFINED\n", vfloatname_escspace); printf("typedef STRUCT_KEYWORD_%s {\n", architecture); printf(" %s x, y;\n", vfloatname); printf("} Sleef_%s_2;\n", vfloatname_escspace); printf("#define Sleef_%s_2_DEFINED\n", vfloatname_escspace); printf("#endif\n"); printf("\n"); //printf("typedef %s vint2_%s;\n", vint2name, isaname); //printf("\n"); for(int i=0;funcList[i].name != NULL;i++) { switch(funcList[i].funcType) { case 0: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sf%s_u%02d%s(%s);\n", vfloatname, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname); } else { printf("IMPORT CONST %s Sleef_%sf%s%s%s(%s);\n", vfloatname, funcList[i].name, wsp, isaub, isaname, vfloatname); } break; case 1: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sf%s_u%02d%s(%s, %s);\n", vfloatname, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname, vfloatname); } else { printf("IMPORT CONST %s Sleef_%sf%s%s%s(%s, %s);\n", vfloatname, funcList[i].name, wsp, isaub, isaname, vfloatname, vfloatname); } break; case 2: case 6: if (funcList[i].ulp >= 0) { printf("IMPORT CONST Sleef_%s_2 Sleef_%sf%s_u%02d%s(%s);\n", vfloatname_escspace, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname); } else { printf("IMPORT CONST Sleef_%s_2 Sleef_%sf%s%s%s(%s);\n", vfloatname_escspace, funcList[i].name, wsp, isaub, isaname, vfloatname); } break; /* case 3: printf("IMPORT CONST %s Sleef_%sf%d_%s(%s, vint2_%s);\n", vfloatname, funcList[i].name, wsp, isaname, vfloatname, isaname); break; case 4: printf("IMPORT CONST vint2_%s Sleef_%sf%d_%s(%s);\n", isaname, funcList[i].name, wsp, isaname, vfloatname); break; */ case 5: if (funcList[i].ulp >= 0) { printf("IMPORT CONST %s Sleef_%sf%s_u%02d%s(%s, %s, %s);\n", vfloatname, funcList[i].name, wsp, funcList[i].ulp, isaname, vfloatname, vfloatname, vfloatname); } else { printf("IMPORT CONST %s Sleef_%sf%s%s%s(%s, %s, %s);\n", vfloatname, funcList[i].name, wsp, isaub, isaname, vfloatname, vfloatname, vfloatname); } break; case 7: printf("IMPORT CONST int Sleef_%sf%s%s%s(int);\n", funcList[i].name, wsp, isaub, isaname); break; case 8: printf("IMPORT CONST void *Sleef_%sf%s%s%s(int);\n", funcList[i].name, wsp, isaub, isaname); break; } } printf("#endif\n"); free(vdoublename_escspace); free(vfloatname_escspace); free(vintname_escspace); free(vint2name_escspace); } exit(0); } sleef-3.3.1/src/libm/mkrename_gnuabi.c000066400000000000000000000113561333715643700176400ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #include #include #include #include "funcproto.h" int main(int argc, char **argv) { if (argc < 5) { fprintf(stderr, "Usage : %s \n", argv[0]); exit(-1); } char *isaname = argv[1]; char *mangledisa = argv[2]; char *wdp = argv[3]; char *wsp = argv[4]; // VLA SVE does not set the vector length in the mangled names. if (strcmp(isaname, "sve") == 0) wdp = wsp = "x"; static char *ulpSuffixStr[] = { "", "_u1", "_u05", "_u35", "_u15" }; static char *vparameterStrDP[] = { "v", "vv", "vl8l8", "vv", "v", "vvv", "vl8" }; static char *vparameterStrSP[] = { "v", "vv", "vl4l4", "vv", "v", "vvv", "vl4" }; for(int i=0;funcList[i].name != NULL;i++) { if ((funcList[i].flags & 1) != 0) continue; if (funcList[i].ulp < 0) { printf("#define x%s _ZGV%sN%s%s_%s\n", funcList[i].name, mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); printf("#define str_x%s \"_ZGV%sN%s%s_%s\"\n", funcList[i].name, mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); printf("#define __%s_finite _ZGV%sN%s%s___%s_finite\n", funcList[i].name, mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); } else if (funcList[i].ulp < 20) { printf("#define x%s%s _ZGV%sN%s%s_%s\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); printf("#define str_x%s%s \"_ZGV%sN%s%s_%s\"\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); printf("#define __%s%s_finite _ZGV%sN%s%s___%s_finite\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); } else { printf("#define x%s%s _ZGV%sN%s%s_%s_u%d\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name, funcList[i].ulp); printf("#define str_x%s%s \"_ZGV%sN%s%s_%s_u%d\"\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name, funcList[i].ulp); printf("#define __%s%s_finite _ZGV%sN%s%s___%s_finite\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wdp, vparameterStrDP[funcList[i].funcType], funcList[i].name); } } printf("\n"); for(int i=0;funcList[i].name != NULL;i++) { if ((funcList[i].flags & 1) != 0) continue; if (funcList[i].ulp < 0) { printf("#define x%sf _ZGV%sN%s%s_%sf\n", funcList[i].name, mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); printf("#define str_x%sf \"_ZGV%sN%s%s_%sf\"\n", funcList[i].name, mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); printf("#define __%sf_finite _ZGV%sN%s%s___%sf_finite\n", funcList[i].name, mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); } else if (funcList[i].ulp < 20) { printf("#define x%sf%s _ZGV%sN%s%s_%sf\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); printf("#define str_x%sf%s \"_ZGV%sN%s%s_%sf\"\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); printf("#define __%sf%s_finite _ZGV%sN%s%s___%sf_finite\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); } else { printf("#define x%sf%s _ZGV%sN%s%s_%sf_u%d\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name, funcList[i].ulp); printf("#define str_x%sf%s \"_ZGV%sN%s%s_%sf_u%d\"\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name, funcList[i].ulp); printf("#define __%sf%s_finite _ZGV%sN%s%s___%sf_finite\n", funcList[i].name, ulpSuffixStr[funcList[i].ulpSuffix], mangledisa, wsp, vparameterStrSP[funcList[i].funcType], funcList[i].name); } } exit(0); } sleef-3.3.1/src/libm/norename.h000077500000000000000000000110001333715643700163120ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include #ifdef ENABLE_DP #ifdef ENABLE_SVE typedef __sizeless_struct vdouble2 { svfloat64_t x; svfloat64_t y; } vdouble2; #else typedef struct { vdouble x, y; } vdouble2; #endif vdouble xldexp(vdouble x, vint q); vint xilogb(vdouble d); vdouble xsin(vdouble d); vdouble xcos(vdouble d); vdouble2 xsincos(vdouble d); vdouble xtan(vdouble d); vdouble xasin(vdouble s); vdouble xacos(vdouble s); vdouble xatan(vdouble s); vdouble xatan2(vdouble y, vdouble x); vdouble xlog(vdouble d); vdouble xexp(vdouble d); vdouble xpow(vdouble x, vdouble y); vdouble xsinh(vdouble d); vdouble xcosh(vdouble d); vdouble xtanh(vdouble d); vdouble xsinh_u35(vdouble d); vdouble xcosh_u35(vdouble d); vdouble xtanh_u35(vdouble d); vdouble xasinh(vdouble s); vdouble xacosh(vdouble s); vdouble xatanh(vdouble s); vdouble xcbrt(vdouble d); vdouble xexp2(vdouble a); vdouble xexp10(vdouble a); vdouble xexpm1(vdouble a); vdouble xlog10(vdouble a); vdouble xlog2(vdouble a); vdouble xlog1p(vdouble a); vdouble xsin_u1(vdouble d); vdouble xcos_u1(vdouble d); vdouble2 xsincos_u1(vdouble d); vdouble xtan_u1(vdouble d); vdouble xasin_u1(vdouble s); vdouble xacos_u1(vdouble s); vdouble xatan_u1(vdouble s); vdouble xatan2_u1(vdouble y, vdouble x); vdouble xlog_u1(vdouble d); vdouble xcbrt_u1(vdouble d); vdouble2 xsincospi_u05(vdouble d); vdouble2 xsincospi_u35(vdouble d); vdouble xsinpi_u05(vdouble d); vdouble xcospi_u05(vdouble d); vdouble xldexp(vdouble, vint); vint xilogb(vdouble); vdouble xfma(vdouble, vdouble, vdouble); vdouble xsqrt(vdouble); vdouble xsqrt_u05(vdouble); vdouble xsqrt_u35(vdouble); vdouble xhypot_u05(vdouble, vdouble); vdouble xhypot_u35(vdouble, vdouble); vdouble xfabs(vdouble); vdouble xcopysign(vdouble, vdouble); vdouble xfmax(vdouble, vdouble); vdouble xfmin(vdouble, vdouble); vdouble xfdim(vdouble, vdouble); vdouble xtrunc(vdouble); vdouble xfloor(vdouble); vdouble xceil(vdouble); vdouble xround(vdouble); vdouble xrint(vdouble); vdouble xnextafter(vdouble, vdouble); vdouble xfrfrexp(vdouble); vint xexpfrexp(vdouble); vdouble xfmod(vdouble, vdouble); vdouble2 xmodf(vdouble); vdouble xlgamma_u1(vdouble); vdouble xtgamma_u1(vdouble); vdouble xerf_u1(vdouble); vdouble xerfc_u15(vdouble); #endif // #ifdef ENABLE_SP #ifdef ENABLE_SVE typedef __sizeless_struct vfloat2 { svfloat32_t x; svfloat32_t y; } vfloat2; #else typedef struct { vfloat x, y; } vfloat2; #endif vfloat xldexpf(vfloat x, vint2 q); vint2 xilogbf(vfloat d); vfloat xsinf(vfloat d); vfloat xcosf(vfloat d); vfloat2 xsincosf(vfloat d); vfloat xtanf(vfloat d); vfloat xasinf(vfloat s); vfloat xacosf(vfloat s); vfloat xatanf(vfloat s); vfloat xatan2f(vfloat y, vfloat x); vfloat xlogf(vfloat d); vfloat xexpf(vfloat d); vfloat xcbrtf(vfloat s); vfloat xpowf(vfloat x, vfloat y); vfloat xsinhf(vfloat x); vfloat xcoshf(vfloat x); vfloat xtanhf(vfloat x); vfloat xsinhf_u35(vfloat x); vfloat xcoshf_u35(vfloat x); vfloat xtanhf_u35(vfloat x); vfloat xasinhf(vfloat x); vfloat xacoshf(vfloat x); vfloat xatanhf(vfloat x); vfloat xexp2f(vfloat a); vfloat xexp10f(vfloat a); vfloat xexpm1f(vfloat a); vfloat xlog10f(vfloat a); vfloat xlog2f(vfloat a); vfloat xlog1pf(vfloat a); vfloat xsinf_u1(vfloat d); vfloat xcosf_u1(vfloat d); vfloat2 xsincosf_u1(vfloat d); vfloat xtanf_u1(vfloat d); vfloat xasinf_u1(vfloat s); vfloat xacosf_u1(vfloat s); vfloat xatanf_u1(vfloat s); vfloat xatan2f_u1(vfloat y, vfloat x); vfloat xlogf_u1(vfloat d); vfloat xcbrtf_u1(vfloat s); vfloat2 xsincospif_u05(vfloat d); vfloat2 xsincospif_u35(vfloat d); vfloat xsinpif_u05(vfloat d); vfloat xcospif_u05(vfloat d); vfloat xldexpf(vfloat, vint2); vint2 xilogbf(vfloat); vfloat xfmaf(vfloat, vfloat, vfloat); vfloat xsqrtf(vfloat s); vfloat xsqrtf_u05(vfloat s); vfloat xsqrtf_u35(vfloat s); vfloat xhypotf_u05(vfloat, vfloat); vfloat xhypotf_u35(vfloat, vfloat); vfloat xfabsf(vfloat); vfloat xcopysignf(vfloat, vfloat); vfloat xfmaxf(vfloat, vfloat); vfloat xfminf(vfloat, vfloat); vfloat xfdimf(vfloat, vfloat); vfloat xtruncf(vfloat); vfloat xfloorf(vfloat); vfloat xceilf(vfloat); vfloat xroundf(vfloat); vfloat xrintf(vfloat); vfloat xnextafterf(vfloat, vfloat); vfloat xfrfrexpf(vfloat); vint2 xexpfrexpf(vfloat); vfloat xfmodf(vfloat, vfloat); vfloat2 xmodff(vfloat); vfloat xlgammaf_u1(vfloat); vfloat xtgammaf_u1(vfloat); vfloat xerff_u1(vfloat); vfloat xerfcf_u15(vfloat); #endif sleef-3.3.1/src/libm/rempitab.c000066400000000000000000003461721333715643700163260ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #include "misc.h" NOEXPORT ALIGNED(64) const double rempitabdp[] = { 0.15915494309189531785, 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 0.03415494309189533173, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, 0.0029049430918953351999, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.496415728504571394e-51, 0.00095181809189533563356, 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762436344e-52, 0.00095181809189533563356, 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762436344e-52, 0.00046353684189533574198, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.301187206862134399e-54, 0.00021939621689533574198, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.301187206862134399e-54, 9.7325904395335769087e-05, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 3.6290748145335769087e-05, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.7731700203357690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.7731700203357690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.7731700203357690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 1.9584727547107690874e-06, -2.0362228529073840241e-22, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 5.1124121898268875627e-08, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369025999e-57, 2.1321799510573569745e-08, 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369025999e-57, 6.4206383167259151492e-09, -1.3585460269359374382e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, 6.4206383167259151492e-09, -1.3585460269359374382e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, 2.6953480182640010867e-09, -1.3585460269359374382e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, 8.3270286903304384868e-10, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 8.3270286903304384868e-10, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 3.6704158172530459087e-10, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 1.3421093807143501366e-10, 1.9241762160098927996e-26, 3.9750282589222551507e-42, 7.9392906424978921242e-59, 1.7795616244500218596e-11, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 1.7795616244500218596e-11, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 1.7795616244500218596e-11, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 3.2437010161333667893e-12, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 3.2437010161333667893e-12, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 3.2437010161333667893e-12, -1.452834466126541428e-28, -1.5869767474823787636e-44, -2.6168913164368963837e-61, 1.4247116125875099096e-12, 2.5861333686050385673e-28, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 5.1521691081458187359e-13, 5.6664945123924856962e-29, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 6.0469559928117805118e-14, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 6.0469559928117805118e-14, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 6.0469559928117805118e-14, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, 3.6261410673097965595e-15, -1.3304005198798645927e-31, -1.7578597149294783985e-47, 8.4432539107728104262e-64, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 7.3427388509295482183e-17, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659578102e-65, 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, 1.7916237278037667488e-17, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, 4.0384494702232122736e-18, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 8.7154294504188129325e-66, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 5.6900251826959904774e-19, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762432635e-52, 3.5887057810247033998e-68, 1.3532164927539732229e-19, -6.4410794381603004826e-36, 1.7634898158762432635e-52, 3.5887057810247033998e-68, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.6901432026846872871e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 1.3348904870778067446e-20, -4.2254836195018827479e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 6.5726412927436632287e-21, 1.0820844071023395684e-36, 1.7634898158762432635e-52, 3.5887057810247033998e-68, 3.1845095037264626247e-21, 3.2976802257607573031e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 1.4904436092178623228e-21, -4.6390169687056261795e-38, -1.1392999419355048437e-54, -4.587677453735884283e-71, 6.4341066196356198368e-22, -4.6390169687056261795e-38, -1.1392999419355048437e-54, -4.587677453735884283e-71, 2.1989418833641172011e-22, 4.7649378378726728402e-38, 9.3011872068621332399e-54, 1.113250147552460308e-69, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 8.135951522836682362e-24, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 1.5185066224124613304e-24, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 6.9132600985943383921e-25, 7.8591368887290111994e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 2.7773570358292009361e-25, -1.3244127270701094468e-41, -2.4695541513869446866e-57, -3.2399200798614356002e-74, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, 7.0940550444663151936e-26, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, 1.9241762160098927996e-26, 3.9750282589222551507e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, 1.9241762160098927996e-26, 3.9750282589222551507e-42, 7.9392906424978921242e-59, 2.9745456030524896742e-75, 6.317065088957874881e-27, -3.2976062348358281152e-43, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 6.317065088957874881e-27, -3.2976062348358281152e-43, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 3.0858908211726098086e-27, 3.8770419025072344914e-43, 7.9392906424978921242e-59, 2.9745456030524896742e-75, 1.4703036872799779898e-27, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 6.625101203336619011e-28, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 2.5861333686050385673e-28, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 5.6664945123924856962e-29, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 5.6664945123924856962e-29, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008290615e-78, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 6.1778471897801070206e-30, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 3.0224035688960604996e-30, 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 1.4446817584540368888e-30, 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008290615e-78, 6.5582085323302525856e-31, 7.0002556871006273225e-47, 1.0567786762735315635e-62, -6.1446417754639313137e-79, 2.6139040062251944343e-31, -1.7578597149294783985e-47, 8.4432539107728090768e-64, 1.9517662449371102229e-79, 6.4175174317266470186e-32, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371102229e-79, 6.4175174317266470186e-32, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371102229e-79, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659569668e-65, -7.2335760163150273591e-81, 1.4871367740953237822e-32, -1.1571307704883330232e-48, -6.7249112515659569668e-65, -7.2335760163150273591e-81, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.5454160968749269937e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 1.0046721413651383112e-33, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.3430016361024414106e-34, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.3430016361024414106e-34, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.4185069655957361252e-83, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.4185069655957361252e-83, 4.1707169171520598517e-35, -2.4964157285045710972e-51, -1.866653112309982615e-67, 1.4185069655957361252e-83, 1.7633044866680145008e-35, 2.8491136916798196016e-51, 4.0680767287898916022e-67, 1.4185069655957361252e-83, 5.595982714259923599e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, 5.595982714259923599e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, 2.5867171761548675786e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, 1.0820844071023395684e-36, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085140685e-84, 3.2976802257607573031e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, 3.2976802257607573031e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, 1.4168892644450972904e-37, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, 4.7649378378726728402e-38, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280944778e-86, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 6.2960434583523738135e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 2.6226236120327253511e-40, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 7.8591368887290111994e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 7.8591368887290111994e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 3.2673620808294506214e-41, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.524218473063975309e-90, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 9.7147467687967058732e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 3.9750282589222551507e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 1.1051690039850297894e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 1.1051690039850297894e-42, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 3.8770419025072344914e-43, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257943935e-91, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 2.8971783383570358633e-44, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 6.5510079543732854985e-45, -2.6168913164368963837e-61, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 9.4581409707401690366e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 2.451648649116083682e-46, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, 7.0002556871006273225e-47, 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.5355611056488084652e-94, 7.0002556871006273225e-47, 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.5355611056488084652e-94, 2.6211979860855749482e-47, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 4.3166913557804827486e-48, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 1.5797802926460750146e-48, 2.3660905534865399025e-64, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 2.1132476107887107169e-49, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, 4.0267819632970559834e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, 1.8885701952232994665e-50, -7.8013829534098555144e-67, -1.1759240463442418271e-82, 2.8738690232659205689e-99, 8.1946431118642097069e-51, 1.5937536410989638719e-66, 1.459625439463388979e-82, 2.8738690232659205689e-99, 2.8491136916798196016e-51, 4.0680767287898916022e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, 1.7634898158762432635e-52, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142808004e-99, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, 9.3011872068621332399e-54, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, 4.0809436324633147776e-54, -4.587677453735884283e-71, -2.8859500138942368532e-87, -5.6567402911297190423e-103, 1.470821845263904967e-54, -4.587677453735884283e-71, -2.8859500138942368532e-87, -5.6567402911297190423e-103, 1.6576095166419998917e-55, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630537605e-103, 1.6576095166419998917e-55, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630537605e-103, 1.6576095166419998917e-55, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630537605e-103, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 2.6283399642369020339e-57, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.145584788913072936e-105, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 7.9392906424978921242e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 3.9565608646667614317e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 1.9651959757511960854e-59, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.554706987098633963e-107, 9.6951353129341363331e-60, 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.7167230906452229674e-60, 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 2.2275169795007668372e-60, 2.1097166542226745549e-76, 4.4670685979800101779e-92, 1.0451839188820145747e-108, 9.8291392392853877215e-61, -6.5385728340754726503e-77, -1.3520652573660833788e-93, -2.3220403312043059402e-109, 3.6061239614242446325e-61, 7.2792968540756372162e-77, 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 4.9461632249367446986e-62, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.535561105648808199e-94, -1.9306041120023063932e-110, 1.0567786762735315635e-62, -6.1446417754639301152e-79, -1.535561105648808199e-94, -1.9306041120023063932e-110, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, 8.4432539107728090768e-64, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514358328e-112, 2.3660905534865399025e-64, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 2.3660905534865399025e-64, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 8.4679971416497210292e-65, -7.2335760163150273591e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 8.7154294504188118783e-66, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 3.9676455775389135587e-66, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 1.5937536410989638719e-66, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, 4.0680767287898916022e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, 4.0680767287898916022e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, 1.1007118082399544936e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, 1.1007118082399544936e-67, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894110579e-116, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142805974e-99, 1.8395411057335783574e-115, 3.588705781024702988e-68, 5.9489775128085131541e-84, 1.0450891972142805974e-99, 1.8395411057335783574e-115, 1.7341027056809927069e-68, 1.830931441234090934e-84, 1.3069928418846076386e-100, 3.1677600334418876704e-116, 8.0680116800913756637e-69, -2.2809159455312046184e-85, -4.0748824503880445403e-101, -6.3915272253158644628e-117, 3.4315039917320989315e-69, -2.2809159455312046184e-85, -4.0748824503880445403e-101, -6.3915272253158644628e-117, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, 1.113250147552460308e-69, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, 5.3368668650755071652e-70, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, 2.4390495598509592076e-70, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, 9.901409072386855505e-71, -2.8859500138942368532e-87, -5.6567402911297190423e-103, -4.6672632026740766185e-119, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, 2.6568658093254848067e-71, 5.1571087196495574384e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, 8.4572999356014273536e-72, 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, 8.4572999356014273536e-72, 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, 3.9294603961880721752e-72, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894729832e-121, 1.6655406264813940833e-72, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894729832e-121, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, 5.3358074162805516304e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, 2.5059077041472040156e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, 1.0909578480805302081e-73, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, 3.8348292004719330442e-74, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598455046e-121, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 2.9745456030524891833e-75, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 7.6368645294831185015e-76, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 2.1097166542226745549e-76, 4.4670685979800101779e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 2.1097166542226745549e-76, 4.4670685979800101779e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 7.2792968540756372162e-77, 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, 3.7036201000008285821e-78, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251472933e-126, 1.5445779612272179051e-78, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251472933e-126, 4.6505689184041232695e-79, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251472933e-126, 4.6505689184041232695e-79, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251472933e-126, 1.9517662449371099233e-79, 2.62202614552995759e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, 6.0236490820360325022e-80, -3.7424672147304925625e-96, -1.784871512364483542e-112, 6.7095375687163151728e-129, 6.0236490820360325022e-80, -3.7424672147304925625e-96, -1.784871512364483542e-112, 6.7095375687163151728e-129, 2.6501457402022643213e-80, 3.7482149527770239293e-96, 6.5314563001514349095e-112, 9.9039323746573674262e-128, 9.6339406928538097998e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.2001823382693912203e-81, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.459625439463388979e-82, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, 1.4185069655957361252e-83, -7.8369062883735917115e-100, -1.9081236411894107761e-116, -2.1796760241698337334e-132, 5.9489775128085131541e-84, 1.0450891972142805974e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, 1.830931441234090934e-84, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247800778e-132, 1.830931441234090934e-84, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247800778e-132, 8.0141992334048515034e-85, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247800778e-132, 2.8666416439368237283e-85, 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.9286284920280941206e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 1.3200167453193350837e-86, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 5.1571087196495574384e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 1.1355793528776598461e-87, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894725532e-121, -3.1562414818576682143e-137, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894725532e-121, -3.1562414818576682143e-137, 1.3019701118468578292e-88, -7.5747169634236195447e-105, -2.0152904854894725532e-121, -3.1562414818576682143e-137, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, 4.5242184730639744369e-90, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852683481e-137, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534162772e-139, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534162772e-139, 5.969437008257942845e-91, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534162772e-139, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 1.0603435429602168369e-91, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 4.4670685979800101779e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 1.3988851821689310822e-92, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 6.3183932821616130831e-93, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591188256e-141, 2.4831640123977650651e-93, 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007823264e-142, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 5.6554937751584084315e-94, -1.9306041120023063932e-110, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.6145718795359707834e-95, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 2.62202614552995759e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, 2.62202614552995759e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, 1.1238897120284541253e-95, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, 3.7482149527770239293e-96, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868987041e-145, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.8738690232659205689e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 1.0450891972142805974e-99, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.5448727249069983612e-148, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.5448727249069983612e-148, 1.3069928418846076386e-100, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.5448727249069983612e-148, 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 1.6400545060233297363e-101, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 2.1132026692048600853e-102, -4.6672632026740766185e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 3.2728487032630532648e-103, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435652883e-152, 1.0404514546648604359e-103, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, 1.0404514546648604359e-103, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, 4.8235214251531210473e-104, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, 2.0330248644053793915e-104, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435652883e-152, 6.3777658403150887343e-105, -2.0152904854894725532e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, 6.3777658403150887343e-105, -2.0152904854894725532e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, 2.88964513938041089e-105, 5.7298933442091639924e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, 1.1455847889130727424e-105, 1.8573014293598452896e-121, 1.1431992269852681095e-137, 2.4782675885631257398e-153, 2.7355461367940366859e-106, -7.8994528064813712419e-123, -2.0037599452814940222e-138, 9.1598554579059548847e-155, 2.7355461367940366859e-106, -7.8994528064813712419e-123, -2.0037599452814940222e-138, 9.1598554579059548847e-155, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, 5.5547069870986327528e-107, 1.6304246661326865276e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.0451839188820145747e-108, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, 1.9359195088038447797e-109, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, 8.7142954880180709975e-110, -4.8867691298577234423e-126, -2.0587960670007819622e-142, -2.8326669474241479263e-158, 3.3918456880078814158e-110, 6.931443500908017045e-126, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220312367e-159, 7.3062078800278780675e-111, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220312367e-159, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, 6.5314563001514349095e-112, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657616072e-160, 2.3732923938934761454e-112, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 2.9421044076449630171e-113, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 2.9421044076449630171e-113, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 2.9421044076449630171e-113, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 3.4325196623373878948e-114, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, 3.4325196623373878948e-114, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, 3.4325196623373878948e-114, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 1.8395411057335783574e-115, -7.8150389500644475446e-132, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 8.2436437080731844263e-116, 1.4726412753514008951e-131, -3.9681466199873824165e-148, 2.9106774506606945839e-164, 3.1677600334418871069e-116, 3.4556869017247794521e-132, 8.544872724906996972e-148, 1.6802919634942429241e-163, 6.2981819612623816536e-117, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254779927e-164, 6.2981819612623816536e-117, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254779927e-164, 6.2981819612623816536e-117, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254779927e-164, 3.1257546646178208289e-117, -6.6414926959353515111e-134, -5.7828074707888119584e-150, -1.2825052715093464343e-165, 1.5395410162955400644e-117, -6.6414926959353515111e-134, -5.7828074707888119584e-150, -1.2825052715093464343e-165, 7.4643419213439950602e-118, 1.0969016447485317626e-133, -5.7828074707888119584e-150, -1.2825052715093464343e-165, 3.4988078005382940294e-118, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.5160407401354430737e-118, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 5.2465720993401781599e-119, -3.755176715260116501e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 2.896544483330507019e-120, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 1.3475077173907800538e-120, -3.156241481857667737e-137, -7.0684085473731388916e-153, -3.3573283875161501977e-170, 5.7298933442091639924e-121, -3.156241481857667737e-137, -7.0684085473731388916e-153, -3.3573283875161501977e-170, 1.8573014293598452896e-121, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 1.8573014293598452896e-121, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 8.8915345064751572143e-122, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 4.0507946129135104481e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, 1.6304246661326865276e-122, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, 4.2023969274227456735e-123, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, 4.2023969274227456735e-123, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911825673e-172, 1.1769344939467164447e-123, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064369683e-172, 1.1769344939467164447e-123, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064369683e-172, 4.2056888557770896953e-124, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064369683e-172, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 4.2386081393205242443e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 1.8749656131673758844e-125, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 6.931443500908017045e-126, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, 1.0223371855251471293e-126, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, 2.8369889610228834887e-127, 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, 2.8369889610228834887e-127, 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, 9.9039323746573674262e-128, -8.6629775332868972816e-145, -1.5987060076657612913e-160, -2.5389576707476506925e-176, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, 6.7095375687163138915e-129, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.197724948400014906e-177, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691155518e-177, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691155518e-177, 9.3892593260023063019e-130, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691155518e-177, 2.175994780857201024e-130, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 2.175994780857201024e-130, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 3.7267864457092460442e-131, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 3.7267864457092460442e-131, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 3.7267864457092460442e-131, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 1.4726412753514008951e-131, -3.9681466199873824165e-148, 2.9106774506606941983e-164, 5.1948630316441296498e-180, 3.4556869017247794521e-132, 8.544872724906996972e-148, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 3.4556869017247794521e-132, 8.544872724906996972e-148, 1.6802919634942426156e-163, 2.8330093736631818036e-179, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795152755e-180, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795152755e-180, 6.3800543877747317218e-133, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795152755e-180, 2.8579525590905986764e-133, -5.7828074707888119584e-150, -1.2825052715093464343e-165, -1.0696067158221530218e-181, 1.0969016447485317626e-133, -5.7828074707888119584e-150, -1.2825052715093464343e-165, -1.0696067158221530218e-181, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 2.1637618757749825688e-134, -8.9490928918944555247e-151, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 1.0631050543111905033e-134, 1.5490398016102376505e-150, 3.4549185946116918017e-166, 1.3535321672928907047e-182, 5.1277664357929471499e-135, 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 2.3761243821334675971e-135, 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 1.0003033553037281263e-135, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, 3.1239284188885823808e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, 1.4041521353514076604e-136, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, 5.4426399358282049106e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.1431992269852681095e-137, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, 6.8339049774534147855e-139, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328578981e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1602886988632691941e-140, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 1.1062055705591186799e-141, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 4.5016298192952031469e-142, -2.8326669474241479263e-158, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 1.2214168761472102282e-142, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, -6.2404128071707654958e-193, 4.0136364036021218058e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, -6.2404128071707654958e-193, 1.9635033141346264592e-143, -1.0134099605688458828e-159, -2.5389576707476506925e-176, -6.2404128071707654958e-193, 9.3843676940087855824e-144, 1.2626949989038732076e-159, 2.2730883653953564668e-175, 2.7431118386590483722e-191, 4.2590349703400483539e-144, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896458822e-192, 1.6963686085056791706e-144, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896458822e-192, 4.1503542758849472122e-145, -1.7614040799531193879e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, 4.1503542758849472122e-145, -1.7614040799531193879e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, 9.4702132359198537748e-146, 1.7950099192230045857e-161, -1.6991004655691153326e-177, -1.856794109153959173e-193, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 1.4618808551874518553e-146, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 4.6083930759590139305e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 2.105789206980137775e-147, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 8.544872724906996972e-148, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, 2.2883630524598079723e-148, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091032843e-196, 2.2883630524598079723e-148, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091032843e-196, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795150614e-180, 1.1067843414450286726e-196, 7.2423563434801054878e-149, 1.1741471776254777999e-164, 1.3389912474795150614e-180, 1.1067843414450286726e-196, 3.3320377982006123631e-149, 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 1.3768785255608653665e-149, 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.9929888924099219388e-150, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, 3.9929888924099219388e-150, -1.9717385086233606481e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, 1.5490398016102376505e-150, 3.4549185946116918017e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, 3.2706525621039604902e-151, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.1571619860435648643e-152, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.4980560800565462618e-202, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.4980560800565462618e-202, 2.4782675885631257398e-153, -3.3573283875161501977e-170, 3.0568054078295488291e-186, 1.4980560800565462618e-202, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 9.1598554579059548847e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 1.7015147267057481414e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 1.7015147267057481414e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 1.7015147267057481414e-155, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 7.6922213530572229852e-156, -4.5159745404911819927e-172, -4.5870810097328572602e-188, -3.2905064432040069127e-204, 3.0307583960570927356e-156, 5.8345524661064358191e-172, 6.9043123899963188689e-188, -3.2905064432040069127e-204, 7.0002691755702864582e-157, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, 7.0002691755702864582e-157, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 1.1734404793201255869e-157, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 4.4508689228885539715e-158, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 8.0910098773220302259e-159, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.3321093418096261919e-207, 3.5387999583765925506e-159, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.3321093418096261919e-207, 1.2626949989038732076e-159, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.3321093418096261919e-207, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, 1.2464251916751375716e-160, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, 5.3514239183991277695e-161, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, 1.7950099192230045857e-161, -1.6991004655691153326e-177, -1.8567941091539589297e-193, -1.8074851186411640793e-209, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 1.6802919634942426156e-163, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189357559e-211, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189357559e-211, 2.9106774506606941983e-164, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189357559e-211, 1.1741471776254777999e-164, 1.3389912474795150614e-180, 1.106784341445028435e-196, 3.3045982549756583552e-212, 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 3.0588204110786950436e-165, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 8.8815756978467430465e-166, 1.3403131492807310959e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 8.8815756978467430465e-166, 1.3403131492807310959e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 3.4549185946116918017e-166, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, 7.4159004299416557678e-167, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 6.3257905089784152346e-168, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 2.0862146470760309789e-168, -1.146150630053972131e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 2.0862146470760309789e-168, -1.146150630053972131e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 1.026320681600434562e-168, 1.2072867382105631402e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, 4.9637369886263658882e-169, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 2.3140020749373754342e-169, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 9.8913461809288020723e-170, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 3.2670088967063259373e-170, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 3.2670088967063259373e-170, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 1.6109245756507072713e-170, -6.2044048008378732802e-187, -5.4322544592823556944e-203, 4.2491789852161138683e-219, 7.8288241512289757055e-171, 1.2181824638728806485e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, 3.6886133485899290404e-171, 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161138683e-219, 1.6185079472704052482e-171, 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161138683e-219, 5.8345524661064358191e-172, 6.9043123899963188689e-188, -3.2905064432040069127e-204, -9.1795828160190082842e-224, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190082842e-224, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190082842e-224, 6.5928896280762691321e-173, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.2381024895275844856e-174, -8.4789520282639751913e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 2.2730883653953564668e-175, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190082842e-224, 1.0095962991602958391e-175, -6.2404128071707654958e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 3.7785026604276538491e-176, -6.2404128071707654958e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578162463e-225, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578162463e-225, 6.1977249484000140293e-177, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578162463e-225, 2.2493122414154495675e-177, 2.5268245888628466632e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 2.7510588792316711745e-178, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450500218e-227, 2.7510588792316711745e-178, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450500218e-227, 2.7510588792316711745e-178, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450500218e-227, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, 2.8330093736631818036e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, 1.2906606599973359683e-179, -7.4549709281190454638e-196, -1.4481306607622412036e-212, 9.9192633285681635836e-229, 5.1948630316441287936e-180, 9.6685396110091013832e-196, 1.7562785002189355449e-211, 1.6821693549018732055e-227, 1.3389912474795150614e-180, 1.106784341445028435e-196, 3.3045982549756578275e-212, 6.2685154049107876715e-228, 1.3389912474795150614e-180, 1.106784341445028435e-196, 3.3045982549756578275e-212, 6.2685154049107876715e-228, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 2.5658818466966882188e-231, 3.7502330143836152136e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 2.5658818466966882188e-231, 1.3403131492807310959e-181, 3.6564932749519464998e-198, 3.7097125405852507464e-214, 2.5658818466966882188e-231, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 1.3535321672928907047e-182, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 6.0043220944823941786e-183, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 2.2388223052591377446e-183, 3.1205762277848031878e-199, -3.3569248349832580936e-217, -1.0577661142165146927e-233, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.5607241064750984115e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 1.2072867382105631402e-184, -1.4832196127821708615e-201, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 3.0568054078295488291e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 1.2181824638728806485e-186, 1.4980560800565460352e-202, 2.6911956484118910092e-218, -5.1336618966962585332e-235, 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161132393e-219, 7.4467067939231424594e-235, 2.9887099189454666024e-187, 4.774153170641553462e-203, 4.2491789852161132393e-219, 7.4467067939231424594e-235, 6.9043123899963188689e-188, -3.2905064432040069127e-204, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 6.9043123899963188689e-188, -3.2905064432040069127e-204, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 1.1586156901317304854e-188, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 4.4040360264865697732e-189, -1.0100405885278530137e-205, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 8.129755890712020335e-190, 9.8339840169166049336e-206, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 8.129755890712020335e-190, 9.8339840169166049336e-206, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 8.129755890712020335e-190, 9.8339840169166049336e-206, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 3.6409303439428119063e-190, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 1.3965175705582071936e-190, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 2.7431118386590483722e-191, -1.332109341809626019e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 1.3403538552936701153e-191, 1.7826390804083638359e-207, -9.1795828160190063645e-224, -2.3569545504732004486e-239, 6.389748636109812983e-192, 2.2526486929936882202e-208, -5.3441928036578156465e-225, -7.741539335184153052e-241, 2.8828536776963681193e-192, 2.2526486929936882202e-208, -5.3441928036578156465e-225, -7.741539335184153052e-241, 1.1294061984896456875e-192, 2.2526486929936882202e-208, -5.3441928036578156465e-225, -7.741539335184153052e-241, 2.5268245888628466632e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 4.2560351759808952526e-241, 2.5268245888628466632e-193, 3.0593092910744445285e-209, 5.4622616159087170031e-225, 4.2560351759808952526e-241, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450490845e-227, 1.3186893776791012681e-242, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450490845e-227, 1.3186893776791012681e-242, 3.3501523985444386676e-194, 6.2591208621664049475e-210, 5.9034406125450490845e-227, 1.3186893776791012681e-242, 6.1039071228393547627e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, 6.1039071228393547627e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, 6.1039071228393547627e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, 2.6792050150137250131e-195, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, 9.6685396110091013832e-196, 1.7562785002189355449e-211, 1.6821693549018732055e-227, -8.7276385348052817035e-244, 2.0416567491425607157e-177, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, 2.0416567491425607157e-177, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, 2.0416567491425607157e-177, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, 6.7450395650278649168e-179, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 5.756447103644822603e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 1.9005753194802080146e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 1.9005753194802080146e-180, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 9.3660737343905436753e-181, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 4.5462340041847754398e-181, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 2.1363141390818913221e-181, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 9.3135420653044926323e-182, -6.1924333305615830735e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 3.2887424025472810002e-182, 7.185309278132283136e-198, -1.9512340798794268979e-214, -3.6162764918921697356e-230, 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, 2.7634257116867652192e-183, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749095611e-233, 8.806758170751374203e-184, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749095611e-233, 8.806758170751374203e-184, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749095611e-233, 4.0998834342223036605e-184, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749095611e-233, 1.7464460659577689118e-184, 2.612671019845610006e-200, 2.1334073625072069974e-216, -9.2331809177749095611e-233, 5.697273818255015375e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 5.697273818255015375e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 2.755477107924346286e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 1.2845787527590117414e-185, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 5.4912957517634446918e-186, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 1.8140498638501083305e-186, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 1.8140498638501083305e-186, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 8.9473839187177424013e-187, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 4.3508265588260719497e-187, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 2.0525478788802367239e-187, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 9.0340853890731911095e-188, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 3.288388689208603045e-188, -1.6933341491052464293e-204, -4.3478137385944270631e-220, -2.3353910329236990725e-236, 4.1554033927630885323e-189, -9.8582956929636044137e-206, -1.4280619485269765742e-221, 1.2171222696290252021e-237, 4.1554033927630885323e-189, -9.8582956929636044137e-206, -1.4280619485269765742e-221, 1.2171222696290252021e-237, 4.1554033927630885323e-189, -9.8582956929636044137e-206, -1.4280619485269765742e-221, 1.2171222696290252021e-237, 5.643429553477207926e-190, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, 5.643429553477207926e-190, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, 5.643429553477207926e-190, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, 1.1546040067079994973e-190, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 1.1546040067079994973e-190, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 3.2397620015697148712e-192, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436627876e-240, 1.4863145223629928288e-192, -7.9038076992129241506e-209, -1.609965144193984205e-224, -1.8313007053436627876e-240, 6.0959078275963141821e-193, 1.156336993964950812e-208, 2.7126166236326293347e-224, -1.8313007053436627876e-240, 1.712289129579509076e-193, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, 1.712289129579509076e-193, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, 6.1638445507530779946e-194, -6.0361608463951204924e-210, 1.1003018740995688645e-226, 5.827891678485165325e-243, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 6.8432117823206978686e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 3.418509674495068119e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 1.7061586205822532442e-195, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 8.499830936258458068e-196, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 4.218953301476420881e-196, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.029900079464340522e-245, 2.0785144840854027628e-196, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, 1.008295075389893466e-196, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, 4.7318537104213881764e-197, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, 2.0563051886826149345e-197, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, 7.185309278132283136e-198, -1.9512340798794268979e-214, -3.6162764918921692779e-230, -2.8387319855193022476e-246, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 4.9643797378534984559e-199, -9.4699347169310243473e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 7.8383517263666503337e-200, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 2.612671019845610006e-200, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 2.612671019845610006e-200, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 1.306250843215349634e-200, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421728101e-248, 6.5304075490021959302e-201, 6.8298960257742791824e-217, 6.8696910062179237095e-233, 3.8349029251851101018e-249, 3.2643571074265457254e-201, -4.2219277387461470355e-218, -1.753154605289404553e-234, -7.5861268822635538093e-251, 1.6313318866387202604e-201, -4.2219277387461470355e-218, -1.753154605289404553e-234, -7.5861268822635538093e-251, 8.1481927624480752786e-202, -4.2219277387461470355e-218, -1.753154605289404553e-234, -7.5861268822635538093e-251, 4.0656297104785107096e-202, 4.8431832608149701961e-218, 8.3111403472061145651e-234, 1.6001805286092554504e-249, 2.0243481844937293316e-202, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, 1.0037074215013384159e-202, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, 4.9338704000514295811e-203, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, 2.3822684925704522921e-203, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, 1.1064675388299639308e-203, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608782288e-251, 4.6856706195971960852e-204, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608782288e-251, 1.4961682352459748279e-204, -8.0675475439086544798e-221, -3.6970842501441777651e-237, -5.7032870362481275794e-253, 1.4961682352459748279e-204, -8.0675475439086544798e-221, -3.6970842501441777651e-237, -5.7032870362481275794e-253, 6.9879263915816924805e-205, 9.6377473771091526132e-221, 1.5959741828948633012e-236, 2.7031904319843495713e-252, 3.0010484111426663515e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, 1.0076094209231528444e-205, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 1.0889925813396166947e-207, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436625212e-240, -2.3341145329525059632e-256, 3.1030547578511949035e-208, -1.609965144193984205e-224, -1.8313007053436625212e-240, -2.3341145329525059632e-256, 1.156336993964950812e-208, 2.7126166236326293347e-224, -1.8313007053436625212e-240, -2.3341145329525059632e-256, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, 1.8297811202182925249e-209, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, 6.1308251778939023781e-210, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.1174271110208206547e-259, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 4.7332165749391048364e-212, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 2.3568521170701555846e-212, -7.7818310317651142243e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 1.1686698881356804311e-212, 1.8601114328504743806e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 5.7457877366844311816e-213, 5.409641648369814791e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, 2.7753321643482446169e-213, -1.1860946916976500828e-229, 6.3146909508553973881e-246, 1.2573885592501532045e-261, 1.290104378180150675e-213, 2.1117734783360818049e-229, 4.2928382696354204061e-245, -2.8075477999879273582e-261, 5.4749048509610403382e-214, 4.6283939331921604413e-230, 6.3146909508553973881e-246, 1.2573885592501532045e-261, 1.7618353855408067201e-214, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 1.7618353855408067201e-214, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.3356801918574821257e-215, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 3.6943433600821895879e-215, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 1.3736749441945438342e-215, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, 2.1334073625072069974e-216, -9.2331809177749077733e-233, -1.4042876247421726117e-248, -9.9505977179164858712e-265, 6.8298960257742791824e-217, 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, 6.8298960257742791824e-217, 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, 3.2038516259498326923e-217, -1.1817449557784924788e-233, -6.3454186796659920093e-250, -2.6436684620390282645e-267, 1.3908294260376086421e-217, 2.8439730252197153919e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, 4.8431832608149701961e-218, 8.3111403472061145651e-234, 1.6001805286092554504e-249, -2.6436684620390282645e-267, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, 3.1062776103441183191e-219, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, 2.7343042298126957741e-220, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, 9.6377473771091526132e-221, 1.5959741828948633012e-236, 2.7031904319843490867e-252, 2.638005906844372114e-268, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, 7.8509991660024955813e-222, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, 2.318094503184431479e-222, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 2.318094503184431479e-222, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 9.3486833747991514629e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 2.4325525462765697993e-223, -1.1429360314275701698e-239, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 7.0351983914592419146e-224, 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 7.0351983914592419146e-224, 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539155726e-272, 2.7126166236326293347e-224, -1.8313007053436625212e-240, -2.3341145329525056675e-256, -2.0046830753539155726e-272, 5.5132573971932232487e-225, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, 5.5132573971932232487e-225, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 1.1003018740995688645e-226, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 2.560476225709334075e-227, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 2.560476225709334075e-227, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 4.4984059688774601837e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 1.8601114328504743806e-228, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 5.409641648369814791e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 5.409641648369814791e-229, -3.0299000794643401155e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 2.1117734783360818049e-229, 4.2928382696354204061e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 4.6283939331921604413e-230, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 4.6283939331921604413e-230, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, 5.060587206499956961e-231, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580820317e-280, 2.4841276986611042098e-231, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, 1.1958979447416775482e-231, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, 5.5178306778196421733e-232, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, 2.2972562930210755192e-232, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 6.8696910062179237095e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 2.8439730252197153919e-233, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 8.3111403472061145651e-234, 1.6001805286092554504e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 8.3111403472061145651e-234, 1.6001805286092554504e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 3.2789928709583552854e-234, 4.8281933032132812475e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 7.6291913283447536617e-235, 2.0347903074934629333e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, 1.3390069830350552605e-235, -6.026193929640082176e-252, -7.0535576022338457803e-268, -4.3807022524130141006e-284, 1.3390069830350552605e-235, -6.026193929640082176e-252, -7.0535576022338457803e-268, -4.3807022524130141006e-284, 1.3390069830350552605e-235, -6.026193929640082176e-252, -7.0535576022338457803e-268, -4.3807022524130141006e-284, 5.5273393987134252385e-236, 1.1432574793608780349e-251, 1.2329569415922591084e-267, -4.3807022524130141006e-284, 1.5959741828948633012e-236, 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, 1.5959741828948633012e-236, 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, 6.1313287894022281692e-237, 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006739096e-285, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, 1.2171222696290252021e-237, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, 6.0284645465737476297e-238, -2.4742181023285720738e-254, -1.2030990169203137715e-270, -9.5347405022956042207e-287, 2.9570854717154947523e-238, 4.3456134301905148502e-254, 6.3684349745470443788e-270, -9.5347405022956042207e-287, 1.4213959342863689955e-238, 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956042207e-287, 6.5355116557180594664e-239, 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956042207e-287, 2.6962878121452450746e-239, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 7.766758903588374524e-240, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.9677290991223565342e-240, -2.3341145329525056675e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 5.6821419688934674008e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 2.6827483411022054912e-241, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 1.1830515272065748694e-241, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, 4.3320312025875939195e-242, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, 5.827891678485165325e-243, -3.117427111020820077e-259, -5.9718623963762788119e-275, 6.1155422068568954053e-291, 1.1413391350613183311e-243, -5.1586784110844895013e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, 1.1413391350613183311e-243, -5.1586784110844895013e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, 1.1413391350613183311e-243, -5.1586784110844895013e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, 5.5552006713333735927e-244, 7.8491179384773690214e-260, -1.9524039360882352712e-276, -2.9779654517181717279e-292, 2.6261053316934700345e-244, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997740506e-292, 1.1615576618735179302e-244, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997740506e-292, 4.2928382696354204061e-245, -2.8075477999879273582e-261, -1.472095602234059958e-277, 2.8287088295287585094e-294, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, 6.3146909508553973881e-246, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, 1.7379794826680480784e-246, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, 1.7379794826680480784e-246, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, 5.9380161562121075096e-247, -1.2904053011746964278e-263, 8.7279092175580810531e-280, 8.8634899828990930877e-296, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, 2.1712682097791944335e-248, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150276549e-299, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 3.8349029251851101018e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 1.6001805286092554504e-249, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 4.8281933032132812475e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 4.8281933032132812475e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 2.0347903074934629333e-250, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 6.3808880963355377617e-251, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 6.3808880963355377617e-251, -2.6436684620390282645e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 2.8891343516857640937e-251, 5.1095823452235464813e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 1.1432574793608780349e-251, 1.2329569415922591084e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, 2.7031904319843490867e-252, 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, 5.2084434157824127104e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, 2.4805108027747776379e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, 1.1165444962709601017e-253, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482777461e-301, 4.3456134301905148502e-254, 6.3684349745470443788e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 9.3569766393097138822e-255, 2.5826679788133653036e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, 8.3218722366085688343e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, 2.9938788518280315834e-256, -2.0046830753539152442e-272, -3.4057806738724185961e-288, 2.3458177946667328156e-304, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, 3.2988215943776273615e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, 1.6338236616337094706e-257, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, 8.0132469526175071002e-258, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 3.850752120757712373e-258, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 1.7695047048278150093e-258, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 7.2888099686286655858e-259, 5.581381609158630475e-275, 6.1155422068568946933e-291, 1.0380272777574237546e-306, 2.0856914288039227544e-259, -1.9524039360882352712e-276, -2.9779654517181712829e-292, -3.000817432603284506e-308, 2.0856914288039227544e-259, -1.9524039360882352712e-276, -2.9779654517181712829e-292, -3.000817432603284506e-308, 7.8491179384773690214e-260, -1.9524039360882352712e-276, -2.9779654517181712829e-292, -3.000817432603284506e-308, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997738281e-292, 1.4493302844111182601e-308, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997738281e-292, 1.4493302844111182601e-308, 1.345219763696439399e-260, 1.6579848156414234801e-276, 1.0303712682997738281e-292, 1.4493302844111182601e-308, 5.3223249184882342185e-261, -1.472095602234059958e-277, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 1.2573885592501529789e-261, 3.0408903374280139822e-277, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 2.4115446944063306384e-262, 2.202741251392177696e-278, 2.8287088295287585094e-294, -1.0874435234232647519e-310, 1.1412520821444306741e-262, -6.1787496089661820348e-279, -3.028042329852615431e-295, -2.182740474438892116e-311, 5.0610577601348040988e-263, 7.9243314524777990283e-279, -3.028042329852615431e-295, -2.182740474438892116e-311, 1.8853262294800541881e-263, 8.7279092175580810531e-280, 8.8634899828990930877e-296, -9.8167844904532653004e-314, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 2.9746046415267896827e-264, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 9.8977243486757054781e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 9.8977243486757054781e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 4.9356438320276576408e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 2.4546035737036337221e-265, -8.6516445844406224413e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 1.2140834445416214873e-265, 1.8893435613692150014e-281, 3.0075895258731974416e-297, -9.8167844904532653004e-314, 5.9382337996061564537e-266, 5.1208955146257653156e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 2.8369334767011265554e-266, 5.1208955146257653156e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 1.2862833152486119506e-266, 1.6777604898591683764e-282, -5.0528699238150265939e-299, -1.3288013265921760399e-314, 5.1095823452235464813e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, 1.2329569415922591084e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, 1.2329569415922591084e-267, -4.3807022524130141006e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, 2.638005906844371576e-268, 6.3790946999826013345e-284, -2.7456019707854725967e-300, -2.5539572388808429997e-317, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, 2.1511502957481757317e-269, 3.2670891426006735363e-285, 2.4084160842482773317e-301, 5.7350888195772519812e-317, 6.3684349745470443788e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 3.6369654387311681856e-319, 6.3684349745470443788e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 3.6369654387311681856e-319, 2.5826679788133653036e-270, -9.5347405022956030541e-287, -1.5805886663557401565e-302, 3.6369654387311681856e-319, 6.8978448094652555593e-271, 1.1480487920352081009e-286, 7.5257037990230704094e-303, 3.6369654387311681856e-319, 6.8978448094652555593e-271, 1.1480487920352081009e-286, 7.5257037990230704094e-303, 3.6369654387311681856e-319, 2.1656360647981577662e-271, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, 2.1656360647981577662e-271, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, 9.825838786313830552e-272, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, 3.9105778554799569972e-272, 9.7287370902823839435e-288, 1.6928061833779524157e-303, 3.6369654387311681856e-319, 9.5294739006302120482e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, 9.5294739006302120482e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, 2.1353977370878701046e-273, -1.2215123283371736879e-289, 6.7342163555358599277e-306, -5.681754927174335258e-322, 2.8687869620228451614e-274, -1.9537812801257956865e-290, 1.0380272777574237546e-306, 6.4228533959362050743e-323, }; NOEXPORT ALIGNED(64) const float rempitabsp[] = { 0.1591549367, 6.420638243e-09, 7.342738699e-17, 1.518506657e-24, 0.03415494412, -1.029942243e-09, -3.759491547e-17, 1.518506657e-24, 0.03415494412, -1.029942243e-09, -3.759491547e-17, 1.518506657e-24, 0.002904943191, -9.861970268e-11, -2.900444505e-18, 7.09405479e-26, 0.002904943191, -9.861970268e-11, -2.900444505e-18, 7.09405479e-26, 0.002904943191, -9.861970268e-11, -2.900444505e-18, 7.09405479e-26, 0.002904943191, -9.861970268e-11, -2.900444505e-18, 7.09405479e-26, 0.0009518180741, 1.779561568e-11, 5.69002499e-19, 1.92417627e-26, 0.0009518180741, 1.779561568e-11, 5.69002499e-19, 1.92417627e-26, 0.0004635368532, -1.130821391e-11, -2.983592131e-19, -6.607632216e-27, 0.0002193962137, 3.243701098e-12, -8.151878508e-20, -1.452834522e-28, 9.732590115e-05, 3.243701098e-12, -8.151878508e-20, -1.452834522e-28, 3.629074854e-05, -3.942777908e-13, -2.036222915e-22, 6.177847236e-30, 5.77316996e-06, 6.046956013e-14, -2.036222915e-22, 6.177847236e-30, 5.77316996e-06, 6.046956013e-14, -2.036222915e-22, 6.177847236e-30, 5.77316996e-06, 6.046956013e-14, -2.036222915e-22, 6.177847236e-30, 1.958472694e-06, 6.046956013e-14, -2.036222915e-22, 6.177847236e-30, 5.112412182e-08, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 5.112412182e-08, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 5.112412182e-08, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 5.112412182e-08, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 5.112412182e-08, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 5.112412182e-08, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 2.132179944e-08, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 6.420638243e-09, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 6.420638243e-09, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 2.695347945e-09, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 8.327028511e-10, 1.791623741e-17, -1.358546052e-25, 2.545416018e-33, 8.327028511e-10, 1.791623741e-17, -1.358546052e-25, 2.545416018e-33, 3.670415916e-10, -9.839338202e-18, -1.358546052e-25, 2.545416018e-33, 1.34210934e-10, 4.038449606e-18, -1.358546052e-25, 2.545416018e-33, 1.779561568e-11, 5.69002499e-19, 1.92417627e-26, -5.360718009e-34, 1.779561568e-11, 5.69002499e-19, 1.92417627e-26, -5.360718009e-34, 1.779561568e-11, 5.69002499e-19, 1.92417627e-26, -5.360718009e-34, 3.243701098e-12, -8.151878508e-20, -1.452834522e-28, 5.595982685e-36, 3.243701098e-12, -8.151878508e-20, -1.452834522e-28, 5.595982685e-36, 3.243701098e-12, -8.151878508e-20, -1.452834522e-28, 5.595982685e-36, 1.424711586e-12, 2.690143217e-20, -1.452834522e-28, 5.595982685e-36, 5.152168839e-13, 2.690143217e-20, -1.452834522e-28, 5.595982685e-36, 6.046956013e-14, -2.036222915e-22, 6.177847236e-30, -4.639017063e-38, 6.046956013e-14, -2.036222915e-22, 6.177847236e-30, -4.639017063e-38, 6.046956013e-14, -2.036222915e-22, 6.177847236e-30, -4.639017063e-38, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, -4.639017063e-38, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, -4.639017063e-38, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, -4.639017063e-38, 3.626141271e-15, -2.036222915e-22, 6.177847236e-30, -4.639017063e-38, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 6.296048013e-40, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 6.296048013e-40, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 6.296048013e-40, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 6.296048013e-40, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 6.296048013e-40, 7.342738699e-17, 1.518506657e-24, -3.443243946e-32, 6.296048013e-40, 1.791623741e-17, -1.358546052e-25, 2.545416018e-33, 7.859182437e-41, 1.791623741e-17, -1.358546052e-25, 2.545416018e-33, 7.859182437e-41, 4.038449606e-18, -1.358546052e-25, 2.545416018e-33, 7.859182437e-41, 4.038449606e-18, -1.358546052e-25, 2.545416018e-33, 7.859182437e-41, 5.69002499e-19, 1.92417627e-26, -5.360718009e-34, -1.324367179e-41, 5.69002499e-19, 1.92417627e-26, -5.360718009e-34, -1.324367179e-41, 5.69002499e-19, 1.92417627e-26, -5.360718009e-34, -1.324367179e-41, 1.35321643e-19, 6.31706524e-27, -1.508858235e-34, -1.764234767e-42, 1.35321643e-19, 6.31706524e-27, -1.508858235e-34, -1.764234767e-42, 2.690143217e-20, -1.452834522e-28, 5.595982685e-36, 2.942726775e-44, 2.690143217e-20, -1.452834522e-28, 5.595982685e-36, 2.942726775e-44, 2.690143217e-20, -1.452834522e-28, 5.595982685e-36, 2.942726775e-44, 1.334890502e-20, -1.452834522e-28, 5.595982685e-36, 2.942726775e-44, 6.572641438e-21, -1.452834522e-28, 5.595982685e-36, 2.942726775e-44, 0.05874383077, 1.045283748e-09, -7.794641199e-18, -2.927455424e-25, 0.02749383263, -8.173614008e-10, -7.794641199e-18, -2.927455424e-25, 0.0118688317, 1.139611669e-10, -8.557476052e-19, 1.744719042e-26, 0.004056331702, 1.139611669e-10, -8.557476052e-19, 1.744719042e-26, 0.000150081818, -2.454155802e-12, 1.161415056e-20, -3.242678858e-28, 0.000150081818, -2.454155802e-12, 1.161415056e-20, -3.242678858e-28, 0.000150081818, -2.454155802e-12, 1.161415056e-20, -3.242678858e-28, 0.000150081818, -2.454155802e-12, 1.161415056e-20, -3.242678858e-28, 0.000150081818, -2.454155802e-12, 1.161415056e-20, -3.242678858e-28, 2.801150367e-05, -6.351663986e-13, 1.161415056e-20, -3.242678858e-28, 2.801150367e-05, -6.351663986e-13, 1.161415056e-20, -3.242678858e-28, 2.801150367e-05, -6.351663986e-13, 1.161415056e-20, -3.242678858e-28, 1.27527137e-05, 2.743283031e-13, 1.161415056e-20, -3.242678858e-28, 5.123319625e-06, -1.804190342e-13, -1.938377e-21, 7.962890365e-29, 1.308622132e-06, 4.695463785e-14, 1.44975489e-21, -2.134529523e-29, 1.308622132e-06, 4.695463785e-14, 1.44975489e-21, -2.134529523e-29, 3.54947872e-07, -9.888779316e-15, -2.443110301e-22, 3.898253736e-30, 3.54947872e-07, -9.888779316e-15, -2.443110301e-22, 3.898253736e-30, 1.165292858e-07, -2.783352171e-15, -3.255279017e-23, 7.428101617e-31, 1.165292858e-07, -2.783352171e-15, -3.255279017e-23, 7.428101617e-31, 5.692463745e-08, 7.693614554e-16, 2.038676903e-23, 7.428101617e-31, 2.712231506e-08, 7.693614554e-16, 2.038676903e-23, 7.428101617e-31, 1.222115475e-08, -1.188169379e-16, -6.083009781e-24, -4.605074351e-32, 4.770574158e-09, -1.188169379e-16, -6.083009781e-24, -4.605074351e-32, 1.045283748e-09, -7.794641199e-18, -2.927455424e-25, 3.253064536e-33, 1.045283748e-09, -7.794641199e-18, -2.927455424e-25, 3.253064536e-33, 1.139611669e-10, -8.557476052e-19, 1.744719042e-26, 1.715766248e-34, 1.139611669e-10, -8.557476052e-19, 1.744719042e-26, 1.715766248e-34, 1.139611669e-10, -8.557476052e-19, 1.744719042e-26, 1.715766248e-34, 1.139611669e-10, -8.557476052e-19, 1.744719042e-26, 1.715766248e-34, 5.575350598e-11, -8.557476052e-19, 1.744719042e-26, 1.715766248e-34, 2.664967552e-11, -8.557476052e-19, 1.744719042e-26, 1.715766248e-34, 1.209775943e-11, 1.161415056e-20, -3.242678858e-28, 3.057750017e-36, 4.821801812e-12, 1.161415056e-20, -3.242678858e-28, 3.057750017e-36, 1.183823005e-12, 1.161415056e-20, -3.242678858e-28, 3.057750017e-36, 1.183823005e-12, 1.161415056e-20, -3.242678858e-28, 3.057750017e-36, 2.743283031e-13, 1.161415056e-20, -3.242678858e-28, 3.057750017e-36, }; sleef-3.3.1/src/libm/rename.h000066400000000000000000000110731333715643700157640ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #define xsin Sleef_sin_u35 #define xcos Sleef_cos_u35 #define xsincos Sleef_sincos_u35 #define xtan Sleef_tan_u35 #define xasin Sleef_asin_u35 #define xacos Sleef_acos_u35 #define xatan Sleef_atan_u35 #define xatan2 Sleef_atan2_u35 #define xlog Sleef_log_u35 #define xcbrt Sleef_cbrt_u35 #define xsin_u1 Sleef_sin_u10 #define xcos_u1 Sleef_cos_u10 #define xsincos_u1 Sleef_sincos_u10 #define xtan_u1 Sleef_tan_u10 #define xasin_u1 Sleef_asin_u10 #define xacos_u1 Sleef_acos_u10 #define xatan_u1 Sleef_atan_u10 #define xatan2_u1 Sleef_atan2_u10 #define xlog_u1 Sleef_log_u10 #define xcbrt_u1 Sleef_cbrt_u10 #define xexp Sleef_exp_u10 #define xpow Sleef_pow_u10 #define xsinh Sleef_sinh_u10 #define xcosh Sleef_cosh_u10 #define xtanh Sleef_tanh_u10 #define xsinh_u35 Sleef_sinh_u35 #define xcosh_u35 Sleef_cosh_u35 #define xtanh_u35 Sleef_tanh_u35 #define xasinh Sleef_asinh_u10 #define xacosh Sleef_acosh_u10 #define xatanh Sleef_atanh_u10 #define xexp2 Sleef_exp2_u10 #define xexp10 Sleef_exp10_u10 #define xexpm1 Sleef_expm1_u10 #define xlog10 Sleef_log10_u10 #define xlog2 Sleef_log2_u10 #define xlog1p Sleef_log1p_u10 #define xsincospi_u05 Sleef_sincospi_u05 #define xsincospi_u35 Sleef_sincospi_u35 #define xsinpi_u05 Sleef_sinpi_u05 #define xcospi_u05 Sleef_cospi_u05 #define xldexp Sleef_ldexp #define xilogb Sleef_ilogb #define xfma Sleef_fma #define xsqrt Sleef_sqrt #define xsqrt_u05 Sleef_sqrt_u05 #define xsqrt_u35 Sleef_sqrt_u35 #define xhypot_u05 Sleef_hypot_u05 #define xhypot_u35 Sleef_hypot_u35 #define xfabs Sleef_fabs #define xcopysign Sleef_copysign #define xfmax Sleef_fmax #define xfmin Sleef_fmin #define xfdim Sleef_fdim #define xtrunc Sleef_trunc #define xfloor Sleef_floor #define xceil Sleef_ceil #define xround Sleef_round #define xrint Sleef_rint #define xnextafter Sleef_nextafter #define xfrfrexp Sleef_frfrexp #define xexpfrexp Sleef_expfrexp #define xfmod Sleef_fmod #define xmodf Sleef_modf #define xlgamma_u1 Sleef_lgamma_u10 #define xtgamma_u1 Sleef_tgamma_u10 #define xerf_u1 Sleef_erf_u10 #define xerfc_u15 Sleef_erfc_u15 // #define xsinf Sleef_sinf_u35 #define xcosf Sleef_cosf_u35 #define xsincosf Sleef_sincosf_u35 #define xtanf Sleef_tanf_u35 #define xasinf Sleef_asinf_u35 #define xacosf Sleef_acosf_u35 #define xatanf Sleef_atanf_u35 #define xatan2f Sleef_atan2f_u35 #define xlogf Sleef_logf_u35 #define xcbrtf Sleef_cbrtf_u35 #define xsinf_u1 Sleef_sinf_u10 #define xcosf_u1 Sleef_cosf_u10 #define xsincosf_u1 Sleef_sincosf_u10 #define xtanf_u1 Sleef_tanf_u10 #define xasinf_u1 Sleef_asinf_u10 #define xacosf_u1 Sleef_acosf_u10 #define xatanf_u1 Sleef_atanf_u10 #define xatan2f_u1 Sleef_atan2f_u10 #define xlogf_u1 Sleef_logf_u10 #define xcbrtf_u1 Sleef_cbrtf_u10 #define xexpf Sleef_expf_u10 #define xpowf Sleef_powf_u10 #define xsinhf Sleef_sinhf_u10 #define xcoshf Sleef_coshf_u10 #define xtanhf Sleef_tanhf_u10 #define xsinhf_u35 Sleef_sinhf_u35 #define xcoshf_u35 Sleef_coshf_u35 #define xtanhf_u35 Sleef_tanhf_u35 #define xasinhf Sleef_asinhf_u10 #define xacoshf Sleef_acoshf_u10 #define xatanhf Sleef_atanhf_u10 #define xexp2f Sleef_exp2f_u10 #define xexp10f Sleef_exp10f_u10 #define xexpm1f Sleef_expm1f_u10 #define xlog10f Sleef_log10f_u10 #define xlog2f Sleef_log2f_u10 #define xlog1pf Sleef_log1pf_u10 #define xsincospif_u05 Sleef_sincospif_u05 #define xsincospif_u35 Sleef_sincospif_u35 #define xsinpif_u05 Sleef_sinpif_u05 #define xcospif_u05 Sleef_cospif_u05 #define xldexpf Sleef_ldexpf #define xilogbf Sleef_ilogbf #define xfmaf Sleef_fmaf #define xsqrtf Sleef_sqrtf #define xsqrtf_u05 Sleef_sqrtf_u05 #define xsqrtf_u35 Sleef_sqrtf_u35 #define xhypotf_u05 Sleef_hypotf_u05 #define xhypotf_u35 Sleef_hypotf_u35 #define xfabsf Sleef_fabsf #define xcopysignf Sleef_copysignf #define xfmaxf Sleef_fmaxf #define xfminf Sleef_fminf #define xfdimf Sleef_fdimf #define xtruncf Sleef_truncf #define xfloorf Sleef_floorf #define xceilf Sleef_ceilf #define xroundf Sleef_roundf #define xrintf Sleef_rintf #define xnextafterf Sleef_nextafterf #define xfrfrexpf Sleef_frfrexpf #define xexpfrexpf Sleef_expfrexpf #define xfmodf Sleef_fmodf #define xmodff Sleef_modff #define xlgammaf_u1 Sleef_lgammaf_u10 #define xtgammaf_u1 Sleef_tgammaf_u10 #define xerff_u1 Sleef_erff_u10 #define xerfcf_u15 Sleef_erfcf_u15 // #define xsincospil_u05 Sleef_sincospil_u05 #define xsincospil_u35 Sleef_sincospil_u35 #define xsincospiq_u05 Sleef_sincospiq_u05 #define xsincospiq_u35 Sleef_sincospiq_u35 sleef-3.3.1/src/libm/sleefdp.c000066400000000000000000002306751333715643700161450ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include #ifndef ENABLE_BUILTIN_MATH #include #define SQRT sqrt #else #define SQRT __builtin_sqrt #endif #include "misc.h" extern const double rempitabdp[]; #ifdef DORENAME #include "rename.h" #endif #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif static INLINE CONST int64_t doubleToRawLongBits(double d) { union { double f; int64_t i; } tmp; tmp.f = d; return tmp.i; } static INLINE CONST double longBitsToDouble(int64_t i) { union { double f; int64_t i; } tmp; tmp.i = i; return tmp.f; } static INLINE CONST double fabsk(double x) { return longBitsToDouble(0x7fffffffffffffffLL & doubleToRawLongBits(x)); } static INLINE CONST double mulsign(double x, double y) { return longBitsToDouble(doubleToRawLongBits(x) ^ (doubleToRawLongBits(y) & (1LL << 63))); } static INLINE CONST double copysignk(double x, double y) { return longBitsToDouble((doubleToRawLongBits(x) & ~(1LL << 63)) ^ (doubleToRawLongBits(y) & (1LL << 63))); } static INLINE CONST double sign(double d) { return mulsign(1, d); } static INLINE CONST double mla(double x, double y, double z) { return x * y + z; } static INLINE CONST double rintk(double x) { return x < 0 ? (int)(x - 0.5) : (int)(x + 0.5); } static INLINE CONST int ceilk(double x) { return (int)x + (x < 0 ? 0 : 1); } static INLINE CONST double trunck(double x) { return (double)(int)x; } static INLINE CONST double fmink(double x, double y) { return x < y ? x : y; } static INLINE CONST double fmaxk(double x, double y) { return x > y ? x : y; } static INLINE CONST int xisnan(double x) { return x != x; } static INLINE CONST int xisinf(double x) { return x == SLEEF_INFINITY || x == -SLEEF_INFINITY; } static INLINE CONST int xisminf(double x) { return x == -SLEEF_INFINITY; } static INLINE CONST int xispinf(double x) { return x == SLEEF_INFINITY; } static INLINE CONST int xisnegzero(double x) { return doubleToRawLongBits(x) == doubleToRawLongBits(-0.0); } static INLINE CONST int xisnumber(double x) { return !xisinf(x) && !xisnan(x); } static INLINE CONST int xisint(double d) { double x = d - (double)(1LL << 31) * (int)(d * (1.0 / (1LL << 31))); return (x == (int)x) || (fabsk(d) >= (double)(1LL << 53)); } static INLINE CONST int xisodd(double d) { double x = d - (double)(1LL << 31) * (int)(d * (1.0 / (1LL << 31))); return (1 & (int)x) != 0 && fabsk(d) < (double)(1LL << 53); } static INLINE CONST double pow2i(int q) { return longBitsToDouble(((int64_t)(q + 0x3ff)) << 52); } static INLINE CONST double ldexpk(double x, int q) { double u; int m; m = q >> 31; m = (((m + q) >> 9) - m) << 7; q = q - (m << 2); m += 0x3ff; m = m < 0 ? 0 : m; m = m > 0x7ff ? 0x7ff : m; u = longBitsToDouble(((int64_t)m) << 52); x = x * u * u * u * u; u = longBitsToDouble(((int64_t)(q + 0x3ff)) << 52); return x * u; } static INLINE CONST double ldexp2k(double d, int e) { // faster than ldexpk, short reach return d * pow2i(e >> 1) * pow2i(e - (e >> 1)); } static INLINE CONST double ldexp3k(double d, int e) { // very fast, no denormal return longBitsToDouble(doubleToRawLongBits(d) + (((int64_t)e) << 52)); } EXPORT CONST double xldexp(double x, int exp) { if (exp > 2100) exp = 2100; if (exp < -2100) exp = -2100; int e0 = exp >> 2; if (exp < 0) e0++; if (-100 < exp && exp < 100) e0 = 0; int e1 = exp - (e0 << 2); double p = pow2i(e0); double ret = x * pow2i(e1) * p * p * p * p; return ret; } static INLINE CONST int ilogbk(double d) { int m = d < 4.9090934652977266E-91; d = m ? 2.037035976334486E90 * d : d; int q = (doubleToRawLongBits(d) >> 52) & 0x7ff; q = m ? q - (300 + 0x03ff) : q - 0x03ff; return q; } // ilogb2k is similar to ilogbk, but the argument has to be a // normalized FP value. static INLINE CONST int ilogb2k(double d) { return ((doubleToRawLongBits(d) >> 52) & 0x7ff) - 0x3ff; } EXPORT CONST int xilogb(double d) { int e = ilogbk(fabsk(d)); e = d == 0.0 ? SLEEF_FP_ILOGB0 : e; e = xisnan(d) ? SLEEF_FP_ILOGBNAN : e; e = xisinf(d) ? INT_MAX : e; return e; } // #ifndef NDEBUG static int checkfp(double x) { if (xisinf(x) || xisnan(x)) return 1; return 0; } #endif static INLINE CONST double upper(double d) { return longBitsToDouble(doubleToRawLongBits(d) & 0xfffffffff8000000LL); } static INLINE CONST Sleef_double2 dd(double h, double l) { Sleef_double2 ret; ret.x = h; ret.y = l; return ret; } static INLINE CONST Sleef_double2 ddnormalize_d2_d2(Sleef_double2 t) { Sleef_double2 s; s.x = t.x + t.y; s.y = t.x - s.x + t.y; return s; } static INLINE CONST Sleef_double2 ddscale_d2_d2_d(Sleef_double2 d, double s) { Sleef_double2 r; r.x = d.x * s; r.y = d.y * s; return r; } static INLINE CONST Sleef_double2 ddneg_d2_d2(Sleef_double2 d) { Sleef_double2 r; r.x = -d.x; r.y = -d.y; return r; } static INLINE CONST Sleef_double2 ddabs_d2_d2(Sleef_double2 x) { return dd(x.x < 0 ? -x.x : x.x, x.x < 0 ? -x.y : x.y); } /* * ddadd and ddadd2 are functions for double-double addition. ddadd * is simpler and faster than ddadd2, but it requires the absolute * value of first argument to be larger than the second argument. The * exact condition that should be met is checked if NDEBUG macro is * not defined. * * Please note that if the results won't be used, it is no problem to * feed arguments that do not meet this condition. You will see * warning messages if you turn off NDEBUG macro and run tester2, but * this is normal. * * Please see : * Jonathan Richard Shewchuk, Adaptive Precision Floating-Point * Arithmetic and Fast Robust Geometric Predicates, Discrete & * Computational Geometry 18:305-363, 1997. */ static INLINE CONST Sleef_double2 ddadd_d2_d_d(double x, double y) { // |x| >= |y| Sleef_double2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y) || fabsk(x) >= fabsk(y) || (fabsk(x+y) <= fabsk(x) && fabsk(x+y) <= fabsk(y)))) { fprintf(stderr, "[ddadd_d2_d_d : %g, %g]\n", x, y); fflush(stderr); } #endif r.x = x + y; r.y = x - r.x + y; return r; } static INLINE CONST Sleef_double2 ddadd2_d2_d_d(double x, double y) { Sleef_double2 r; r.x = x + y; double v = r.x - x; r.y = (x - (r.x - v)) + (y - v); return r; } static INLINE CONST Sleef_double2 ddadd_d2_d2_d(Sleef_double2 x, double y) { // |x| >= |y| Sleef_double2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y) || fabsk(x.x) >= fabsk(y) || (fabsk(x.x+y) <= fabsk(x.x) && fabsk(x.x+y) <= fabsk(y)))) { fprintf(stderr, "[ddadd_d2_d2_d : %g %g]\n", x.x, y); fflush(stderr); } #endif r.x = x.x + y; r.y = x.x - r.x + y + x.y; return r; } static INLINE CONST Sleef_double2 ddadd2_d2_d2_d(Sleef_double2 x, double y) { Sleef_double2 r; r.x = x.x + y; double v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y - v); r.y += x.y; return r; } static INLINE CONST Sleef_double2 ddadd_d2_d_d2(double x, Sleef_double2 y) { // |x| >= |y| Sleef_double2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y.x) || fabsk(x) >= fabsk(y.x) || (fabsk(x+y.x) <= fabsk(x) && fabsk(x+y.x) <= fabsk(y.x)))) { fprintf(stderr, "[ddadd_d2_d_d2 : %g %g]\n", x, y.x); fflush(stderr); } #endif r.x = x + y.x; r.y = x - r.x + y.x + y.y; return r; } static INLINE CONST Sleef_double2 ddadd2_d2_d_d2(double x, Sleef_double2 y) { Sleef_double2 r; r.x = x + y.x; double v = r.x - x; r.y = (x - (r.x - v)) + (y.x - v) + y.y; return r; } static INLINE CONST double ddadd2_d_d_d2(double x, Sleef_double2 y) { return y.y + y.x + x; } static INLINE CONST Sleef_double2 ddadd_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { // |x| >= |y| Sleef_double2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || fabsk(x.x) >= fabsk(y.x) || (fabsk(x.x+y.x) <= fabsk(x.x) && fabsk(x.x+y.x) <= fabsk(y.x)))) { fprintf(stderr, "[ddadd_d2_d2_d2 : %g %g]\n", x.x, y.x); fflush(stderr); } #endif r.x = x.x + y.x; r.y = x.x - r.x + y.x + x.y + y.y; return r; } static INLINE CONST Sleef_double2 ddadd2_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { Sleef_double2 r; r.x = x.x + y.x; double v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y.x - v); r.y += x.y + y.y; return r; } static INLINE CONST Sleef_double2 ddsub_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { // |x| >= |y| Sleef_double2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || fabsk(x.x) >= fabsk(y.x) || (fabsk(x.x-y.x) <= fabsk(x.x) && fabsk(x.x-y.x) <= fabsk(y.x)))) { fprintf(stderr, "[ddsub_d2_d2_d2 : %g %g]\n", x.x, y.x); fflush(stderr); } #endif r.x = x.x - y.x; r.y = x.x - r.x - y.x + x.y - y.y; return r; } static INLINE CONST Sleef_double2 dddiv_d2_d2_d2(Sleef_double2 n, Sleef_double2 d) { double t = 1.0 / d.x; double dh = upper(d.x), dl = d.x - dh; double th = upper(t ), tl = t - th; double nhh = upper(n.x), nhl = n.x - nhh; Sleef_double2 q; q.x = n.x * t; double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); q.y = t * (n.y - q.x * d.y) + u; return q; } static INLINE CONST Sleef_double2 ddmul_d2_d_d(double x, double y) { double xh = upper(x), xl = x - xh; double yh = upper(y), yl = y - yh; Sleef_double2 r; r.x = x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; return r; } static INLINE CONST Sleef_double2 ddmul_d2_d2_d(Sleef_double2 x, double y) { double xh = upper(x.x), xl = x.x - xh; double yh = upper(y ), yl = y - yh; Sleef_double2 r; r.x = x.x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; return r; } static INLINE CONST Sleef_double2 ddmul_d2_d2_d2(Sleef_double2 x, Sleef_double2 y) { double xh = upper(x.x), xl = x.x - xh; double yh = upper(y.x), yl = y.x - yh; Sleef_double2 r; r.x = x.x * y.x; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; return r; } static INLINE CONST double ddmul_d_d2_d2(Sleef_double2 x, Sleef_double2 y) { double xh = upper(x.x), xl = x.x - xh; double yh = upper(y.x), yl = y.x - yh; return x.y * yh + xh * y.y + xl * yl + xh * yl + xl * yh + xh * yh; } static INLINE CONST Sleef_double2 ddsqu_d2_d2(Sleef_double2 x) { double xh = upper(x.x), xl = x.x - xh; Sleef_double2 r; r.x = x.x * x.x; r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); return r; } static INLINE CONST double ddsqu_d_d2(Sleef_double2 x) { double xh = upper(x.x), xl = x.x - xh; return xh * x.y + xh * x.y + xl * xl + (xh * xl + xh * xl) + xh * xh; } static INLINE CONST Sleef_double2 ddrec_d2_d(double d) { double t = 1.0 / d; double dh = upper(d), dl = d - dh; double th = upper(t), tl = t - th; Sleef_double2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); return q; } static INLINE CONST Sleef_double2 ddrec_d2_d2(Sleef_double2 d) { double t = 1.0 / d.x; double dh = upper(d.x), dl = d.x - dh; double th = upper(t ), tl = t - th; Sleef_double2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); return q; } static INLINE CONST Sleef_double2 ddsqrt_d2_d2(Sleef_double2 d) { double t = SQRT(d.x + d.y); return ddscale_d2_d2_d(ddmul_d2_d2_d2(ddadd2_d2_d2_d2(d, ddmul_d2_d_d(t, t)), ddrec_d2_d(t)), 0.5); } static INLINE CONST Sleef_double2 ddsqrt_d2_d(double d) { double t = SQRT(d); return ddscale_d2_d2_d(ddmul_d2_d2_d2(ddadd2_d2_d_d2(d, ddmul_d2_d_d(t, t)), ddrec_d2_d(t)), 0.5); } // static INLINE CONST double atan2k(double y, double x) { double s, t, u; int q = 0; if (x < 0) { x = -x; q = -2; } if (y > x) { t = x; x = y; y = -t; q += 1; } s = y / x; t = s * s; u = -1.88796008463073496563746e-05; u = mla(u, t, 0.000209850076645816976906797); u = mla(u, t, -0.00110611831486672482563471); u = mla(u, t, 0.00370026744188713119232403); u = mla(u, t, -0.00889896195887655491740809); u = mla(u, t, 0.016599329773529201970117); u = mla(u, t, -0.0254517624932312641616861); u = mla(u, t, 0.0337852580001353069993897); u = mla(u, t, -0.0407629191276836500001934); u = mla(u, t, 0.0466667150077840625632675); u = mla(u, t, -0.0523674852303482457616113); u = mla(u, t, 0.0587666392926673580854313); u = mla(u, t, -0.0666573579361080525984562); u = mla(u, t, 0.0769219538311769618355029); u = mla(u, t, -0.090908995008245008229153); u = mla(u, t, 0.111111105648261418443745); u = mla(u, t, -0.14285714266771329383765); u = mla(u, t, 0.199999999996591265594148); u = mla(u, t, -0.333333333333311110369124); t = u * t * s + s; t = q * (M_PI/2) + t; return t; } EXPORT CONST double xatan2(double y, double x) { double r = atan2k(fabsk(y), x); r = mulsign(r, x); if (xisinf(x) || x == 0) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI /2)) : 0); if (xisinf(y) ) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI*1/4)) : 0); if ( y == 0) r = (sign(x) == -1 ? M_PI : 0); return xisnan(x) || xisnan(y) ? SLEEF_NAN : mulsign(r, y); } EXPORT CONST double xasin(double d) { int o = fabsk(d) < 0.5; double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), x = o ? fabsk(d) : SQRT(x2), u; u = +0.3161587650653934628e-1; u = mla(u, x2, -0.1581918243329996643e-1); u = mla(u, x2, +0.1929045477267910674e-1); u = mla(u, x2, +0.6606077476277170610e-2); u = mla(u, x2, +0.1215360525577377331e-1); u = mla(u, x2, +0.1388715184501609218e-1); u = mla(u, x2, +0.1735956991223614604e-1); u = mla(u, x2, +0.2237176181932048341e-1); u = mla(u, x2, +0.3038195928038132237e-1); u = mla(u, x2, +0.4464285681377102438e-1); u = mla(u, x2, +0.7500000000378581611e-1); u = mla(u, x2, +0.1666666666666497543e+0); u = mla(u, x * x2, x); double r = o ? u : (M_PI/2 - 2*u); r = mulsign(r, d); return r; } EXPORT CONST double xacos(double d) { int o = fabsk(d) < 0.5; double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u; double x = o ? fabsk(d) : SQRT(x2); x = fabsk(d) == 1.0 ? 0 : x; u = +0.3161587650653934628e-1; u = mla(u, x2, -0.1581918243329996643e-1); u = mla(u, x2, +0.1929045477267910674e-1); u = mla(u, x2, +0.6606077476277170610e-2); u = mla(u, x2, +0.1215360525577377331e-1); u = mla(u, x2, +0.1388715184501609218e-1); u = mla(u, x2, +0.1735956991223614604e-1); u = mla(u, x2, +0.2237176181932048341e-1); u = mla(u, x2, +0.3038195928038132237e-1); u = mla(u, x2, +0.4464285681377102438e-1); u = mla(u, x2, +0.7500000000378581611e-1); u = mla(u, x2, +0.1666666666666497543e+0); u *= x * x2; double y = 3.1415926535897932/2 - (mulsign(x, d) + mulsign(u, d)); x += u; double r = o ? y : (x*2); if (!o && d < 0) r = ddadd_d2_d2_d(dd(3.141592653589793116, 1.2246467991473532072e-16), -r).x; return r; } EXPORT CONST double xatan(double s) { double t, u; int q = 0; if (sign(s) == -1) { s = -s; q = 2; } if (s > 1) { s = 1.0 / s; q |= 1; } t = s * s; u = -1.88796008463073496563746e-05; u = mla(u, t, 0.000209850076645816976906797); u = mla(u, t, -0.00110611831486672482563471); u = mla(u, t, 0.00370026744188713119232403); u = mla(u, t, -0.00889896195887655491740809); u = mla(u, t, 0.016599329773529201970117); u = mla(u, t, -0.0254517624932312641616861); u = mla(u, t, 0.0337852580001353069993897); u = mla(u, t, -0.0407629191276836500001934); u = mla(u, t, 0.0466667150077840625632675); u = mla(u, t, -0.0523674852303482457616113); u = mla(u, t, 0.0587666392926673580854313); u = mla(u, t, -0.0666573579361080525984562); u = mla(u, t, 0.0769219538311769618355029); u = mla(u, t, -0.090908995008245008229153); u = mla(u, t, 0.111111105648261418443745); u = mla(u, t, -0.14285714266771329383765); u = mla(u, t, 0.199999999996591265594148); u = mla(u, t, -0.333333333333311110369124); t = s + s * (t * u); if ((q & 1) != 0) t = 1.570796326794896557998982 - t; if ((q & 2) != 0) t = -t; return t; } static Sleef_double2 atan2k_u1(Sleef_double2 y, Sleef_double2 x) { double u; Sleef_double2 s, t; int q = 0; if (x.x < 0) { x.x = -x.x; x.y = -x.y; q = -2; } if (y.x > x.x) { t = x; x = y; y.x = -t.x; y.y = -t.y; q += 1; } s = dddiv_d2_d2_d2(y, x); t = ddsqu_d2_d2(s); t = ddnormalize_d2_d2(t); u = 1.06298484191448746607415e-05; u = mla(u, t.x, -0.000125620649967286867384336); u = mla(u, t.x, 0.00070557664296393412389774); u = mla(u, t.x, -0.00251865614498713360352999); u = mla(u, t.x, 0.00646262899036991172313504); u = mla(u, t.x, -0.0128281333663399031014274); u = mla(u, t.x, 0.0208024799924145797902497); u = mla(u, t.x, -0.0289002344784740315686289); u = mla(u, t.x, 0.0359785005035104590853656); u = mla(u, t.x, -0.041848579703592507506027); u = mla(u, t.x, 0.0470843011653283988193763); u = mla(u, t.x, -0.0524914210588448421068719); u = mla(u, t.x, 0.0587946590969581003860434); u = mla(u, t.x, -0.0666620884778795497194182); u = mla(u, t.x, 0.0769225330296203768654095); u = mla(u, t.x, -0.0909090442773387574781907); u = mla(u, t.x, 0.111111108376896236538123); u = mla(u, t.x, -0.142857142756268568062339); u = mla(u, t.x, 0.199999999997977351284817); u = mla(u, t.x, -0.333333333333317605173818); t = ddmul_d2_d2_d(t, u); t = ddmul_d2_d2_d2(s, ddadd_d2_d_d2(1, t)); if (fabsk(s.x) < 1e-200) t = s; t = ddadd2_d2_d2_d2(ddmul_d2_d2_d(dd(1.570796326794896557998982, 6.12323399573676603586882e-17), q), t); return t; } EXPORT CONST double xatan2_u1(double y, double x) { if (fabsk(x) < 5.5626846462680083984e-309) { y *= (1ULL << 53); x *= (1ULL << 53); } // nexttoward((1.0 / DBL_MAX), 1) Sleef_double2 d = atan2k_u1(dd(fabsk(y), 0), dd(x, 0)); double r = d.x + d.y; r = mulsign(r, x); if (xisinf(x) || x == 0) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI /2)) : 0); if (xisinf(y) ) r = M_PI/2 - (xisinf(x) ? (sign(x) * (M_PI*1/4)) : 0); if ( y == 0) r = (sign(x) == -1 ? M_PI : 0); return xisnan(x) || xisnan(y) ? SLEEF_NAN : mulsign(r, y); } EXPORT CONST double xasin_u1(double d) { int o = fabsk(d) < 0.5; double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u; Sleef_double2 x = o ? dd(fabsk(d), 0) : ddsqrt_d2_d(x2); x = fabsk(d) == 1.0 ? dd(0, 0) : x; u = +0.3161587650653934628e-1; u = mla(u, x2, -0.1581918243329996643e-1); u = mla(u, x2, +0.1929045477267910674e-1); u = mla(u, x2, +0.6606077476277170610e-2); u = mla(u, x2, +0.1215360525577377331e-1); u = mla(u, x2, +0.1388715184501609218e-1); u = mla(u, x2, +0.1735956991223614604e-1); u = mla(u, x2, +0.2237176181932048341e-1); u = mla(u, x2, +0.3038195928038132237e-1); u = mla(u, x2, +0.4464285681377102438e-1); u = mla(u, x2, +0.7500000000378581611e-1); u = mla(u, x2, +0.1666666666666497543e+0); u *= x2 * x.x; Sleef_double2 y = ddadd_d2_d2_d(ddsub_d2_d2_d2(dd(3.141592653589793116/4, 1.2246467991473532072e-16/4), x), -u); double r = o ? (u + x.x) : ((y.x + y.y)*2); r = mulsign(r, d); return r; } EXPORT CONST double xacos_u1(double d) { int o = fabsk(d) < 0.5; double x2 = o ? (d*d) : ((1-fabsk(d))*0.5), u; Sleef_double2 x = o ? dd(fabsk(d), 0) : ddsqrt_d2_d(x2), w; x = fabsk(d) == 1.0 ? dd(0, 0) : x; u = +0.3161587650653934628e-1; u = mla(u, x2, -0.1581918243329996643e-1); u = mla(u, x2, +0.1929045477267910674e-1); u = mla(u, x2, +0.6606077476277170610e-2); u = mla(u, x2, +0.1215360525577377331e-1); u = mla(u, x2, +0.1388715184501609218e-1); u = mla(u, x2, +0.1735956991223614604e-1); u = mla(u, x2, +0.2237176181932048341e-1); u = mla(u, x2, +0.3038195928038132237e-1); u = mla(u, x2, +0.4464285681377102438e-1); u = mla(u, x2, +0.7500000000378581611e-1); u = mla(u, x2, +0.1666666666666497543e+0); u *= x.x * x2; Sleef_double2 y = ddsub_d2_d2_d2(dd(3.141592653589793116/2, 1.2246467991473532072e-16/2), ddadd_d2_d_d(mulsign(x.x, d), mulsign(u, d))); x = ddadd_d2_d2_d(x, u); y = o ? y : ddscale_d2_d2_d(x, 2); if (!o && d < 0) y = ddsub_d2_d2_d2(dd(3.141592653589793116, 1.2246467991473532072e-16), y); return y.x + y.y; } EXPORT CONST double xatan_u1(double d) { Sleef_double2 d2 = atan2k_u1(dd(fabsk(d), 0), dd(1, 0)); double r = d2.x + d2.y; if (xisinf(d)) r = 1.570796326794896557998982; return mulsign(r, d); } typedef struct { double d; int32_t i; } di_t; typedef struct { Sleef_double2 dd; int32_t i; } ddi_t; static CONST di_t rempisub(double x) { // This function is equivalent to : // di_t ret = { x - round(4 * x) * 0.25, (int32_t)(round(4 * x) - round(x) * 4) }; di_t ret; double fr = x - (double)(1LL << 28) * (int32_t)(x * (1.0 / (1LL << 28))); ret.i = ((7 & ((x > 0 ? 4 : 3) + (int32_t)(fr * 8))) - 3) >> 1; fr = fr - 0.25 * (int32_t)(fr * 4 + mulsign(0.5, x)); fr = fabsk(fr) > 0.25 ? (fr - mulsign(0.5, x)) : fr; fr = fabsk(fr) > 1e+10 ? 0 : fr; if (fabsk(x) == 0.12499999999999998612) { fr = x; ret.i = 0; } ret.d = fr; return ret; } // Payne-Hanek like argument reduction static CONST ddi_t rempi(double a) { Sleef_double2 x, y, z; di_t di; double t; int ex = ilogb2k(a) - 55, q = ex > (700-55) ? -64 : 0; a = ldexp3k(a, q); if (ex < 0) ex = 0; ex *= 4; x = ddmul_d2_d_d(a, rempitabdp[ex]); di = rempisub(x.x); q = di.i; x.x = di.d; x = ddnormalize_d2_d2(x); y = ddmul_d2_d_d(a, rempitabdp[ex+1]); x = ddadd2_d2_d2_d2(x, y); di = rempisub(x.x); q += di.i; x.x = di.d; x = ddnormalize_d2_d2(x); y = ddmul_d2_d2_d(dd(rempitabdp[ex+2], rempitabdp[ex+3]), a); x = ddadd2_d2_d2_d2(x, y); x = ddnormalize_d2_d2(x); x = ddmul_d2_d2_d2(x, dd(3.141592653589793116*2, 1.2246467991473532072e-16*2)); ddi_t ret = { fabsk(a) < 0.7 ? dd(a, 0) : x, q }; return ret; } EXPORT CONST double xsin(double d) { double u, s, t = d; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(d * M_1_PI); d = mla(ql, -PI_A2, d); d = mla(ql, -PI_B2, d); } else if (fabsk(d) < TRIGRANGEMAX) { double dqh = trunck(d * (M_1_PI / (1 << 24))) * (double)(1 << 24); ql = rintk(mla(d, M_1_PI, -dqh)); d = mla(dqh, -PI_A, d); d = mla( ql, -PI_A, d); d = mla(dqh, -PI_B, d); d = mla( ql, -PI_B, d); d = mla(dqh, -PI_C, d); d = mla( ql, -PI_C, d); d = mla(dqh + ql, -PI_D, d); } else { ddi_t ddi = rempi(t); ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 1) >> 2; if ((ddi.i & 1) != 0) { ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x), mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x))); } d = ddi.dd.x + ddi.dd.y; if (xisinf(t) || xisnan(t)) d = SLEEF_NAN; } s = d * d; if ((ql & 1) != 0) d = -d; u = -7.97255955009037868891952e-18; u = mla(u, s, 2.81009972710863200091251e-15); u = mla(u, s, -7.64712219118158833288484e-13); u = mla(u, s, 1.60590430605664501629054e-10); u = mla(u, s, -2.50521083763502045810755e-08); u = mla(u, s, 2.75573192239198747630416e-06); u = mla(u, s, -0.000198412698412696162806809); u = mla(u, s, 0.00833333333333332974823815); u = mla(u, s, -0.166666666666666657414808); u = mla(s, u * d, d); if (xisnegzero(t)) u = t; return u; } EXPORT CONST double xsin_u1(double d) { double u; Sleef_double2 s, t, x; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(d * M_1_PI); u = mla(ql, -PI_A2, d); s = ddadd_d2_d_d (u, ql * -PI_B2); } else if (fabsk(d) < TRIGRANGEMAX) { const double dqh = trunck(d * (M_1_PI / (1 << 24))) * (double)(1 << 24); ql = rintk(mla(d, M_1_PI, -dqh)); u = mla(dqh, -PI_A, d); s = ddadd_d2_d_d (u, ql * -PI_A); s = ddadd2_d2_d2_d(s, dqh * -PI_B); s = ddadd2_d2_d2_d(s, ql * -PI_B); s = ddadd2_d2_d2_d(s, dqh * -PI_C); s = ddadd2_d2_d2_d(s, ql * -PI_C); s = ddadd_d2_d2_d (s, (dqh + ql) * -PI_D); } else { ddi_t ddi = rempi(d); ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 1) >> 2; if ((ddi.i & 1) != 0) { ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x), mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x))); } s = ddnormalize_d2_d2(ddi.dd); if (xisinf(d) || xisnan(d)) s.x = SLEEF_NAN; } t = s; s = ddsqu_d2_d2(s); u = 2.72052416138529567917983e-15; u = mla(u, s.x, -7.6429259411395447190023e-13); u = mla(u, s.x, 1.60589370117277896211623e-10); u = mla(u, s.x, -2.5052106814843123359368e-08); u = mla(u, s.x, 2.75573192104428224777379e-06); u = mla(u, s.x, -0.000198412698412046454654947); u = mla(u, s.x, 0.00833333333333318056201922); x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(-0.166666666666666657414808, u * s.x), s)); u = ddmul_d_d2_d2(t, x); if ((ql & 1) != 0) u = -u; if (xisnegzero(d)) u = d; return u; } EXPORT CONST double xcos(double d) { double u, s, t = d; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = mla(2, rintk(d * M_1_PI - 0.5), 1); d = mla(ql, -PI_A2*0.5, d); d = mla(ql, -PI_B2*0.5, d); } else if (fabsk(d) < TRIGRANGEMAX) { double dqh = trunck(d * (M_1_PI / (1LL << 23)) - 0.5 * (M_1_PI / (1LL << 23))); ql = 2*rintk(d * M_1_PI - 0.5 - dqh * (double)(1LL << 23))+1; dqh *= 1 << 24; d = mla(dqh, -PI_A*0.5, d); d = mla( ql, -PI_A*0.5, d); d = mla(dqh, -PI_B*0.5, d); d = mla( ql, -PI_B*0.5, d); d = mla(dqh, -PI_C*0.5, d); d = mla( ql, -PI_C*0.5, d); d = mla(dqh + ql , -PI_D*0.5, d); } else { ddi_t ddi = rempi(t); ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 7) >> 1; if ((ddi.i & 1) == 0) { ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x > 0 ? 1 : -1), mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x > 0 ? 1 : -1))); } d = ddi.dd.x + ddi.dd.y; if (xisinf(t) || xisnan(t)) d = SLEEF_NAN; } s = d * d; if ((ql & 2) == 0) d = -d; u = -7.97255955009037868891952e-18; u = mla(u, s, 2.81009972710863200091251e-15); u = mla(u, s, -7.64712219118158833288484e-13); u = mla(u, s, 1.60590430605664501629054e-10); u = mla(u, s, -2.50521083763502045810755e-08); u = mla(u, s, 2.75573192239198747630416e-06); u = mla(u, s, -0.000198412698412696162806809); u = mla(u, s, 0.00833333333333332974823815); u = mla(u, s, -0.166666666666666657414808); u = mla(s, u * d, d); return u; } EXPORT CONST double xcos_u1(double d) { double u; Sleef_double2 s, t, x; int ql; d = fabsk(d); if (d < TRIGRANGEMAX2) { ql = mla(2, rintk(d * M_1_PI - 0.5), 1); s = ddadd2_d2_d_d(d, ql * (-PI_A2*0.5)); s = ddadd_d2_d2_d(s, ql * (-PI_B2*0.5)); } else if (d < TRIGRANGEMAX) { double dqh = trunck(d * (M_1_PI / (1LL << 23)) - 0.5 * (M_1_PI / (1LL << 23))); ql = 2*rintk(d * M_1_PI - 0.5 - dqh * (double)(1LL << 23))+1; dqh *= 1 << 24; u = mla(dqh, -PI_A*0.5, d); s = ddadd2_d2_d_d (u, ql * (-PI_A*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_C*0.5)); s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5)); } else { ddi_t ddi = rempi(d); ql = ((ddi.i & 3) * 2 + (ddi.dd.x > 0) + 7) >> 1; if ((ddi.i & 1) == 0) { ddi.dd = ddadd2_d2_d2_d2(ddi.dd, dd(mulsign(3.141592653589793116*-0.5, ddi.dd.x > 0 ? 1 : -1), mulsign(1.2246467991473532072e-16*-0.5, ddi.dd.x > 0 ? 1 : -1))); } s = ddnormalize_d2_d2(ddi.dd); if (xisinf(d) || xisnan(d)) s.x = SLEEF_NAN; } t = s; s = ddsqu_d2_d2(s); u = 2.72052416138529567917983e-15; u = mla(u, s.x, -7.6429259411395447190023e-13); u = mla(u, s.x, 1.60589370117277896211623e-10); u = mla(u, s.x, -2.5052106814843123359368e-08); u = mla(u, s.x, 2.75573192104428224777379e-06); u = mla(u, s.x, -0.000198412698412046454654947); u = mla(u, s.x, 0.00833333333333318056201922); x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(-0.166666666666666657414808, u * s.x), s)); u = ddmul_d_d2_d2(t, x); if ((((int)ql) & 2) == 0) u = -u; return u; } EXPORT CONST Sleef_double2 xsincos(double d) { double u, s, t; Sleef_double2 r; int ql; s = d; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(s * (2 * M_1_PI)); s = mla(ql, -PI_A2*0.5, s); s = mla(ql, -PI_B2*0.5, s); } else if (fabsk(d) < TRIGRANGEMAX) { double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24); ql = rintk(d * (2 * M_1_PI) - dqh); s = mla(dqh, -PI_A * 0.5, s); s = mla( ql, -PI_A * 0.5, s); s = mla(dqh, -PI_B * 0.5, s); s = mla( ql, -PI_B * 0.5, s); s = mla(dqh, -PI_C * 0.5, s); s = mla( ql, -PI_C * 0.5, s); s = mla(dqh + ql, -PI_D * 0.5, s); } else { ddi_t ddi = rempi(d); ql = ddi.i; s = ddi.dd.x + ddi.dd.y; if (xisinf(d) || xisnan(d)) s = SLEEF_NAN; } t = s; s = s * s; u = 1.58938307283228937328511e-10; u = mla(u, s, -2.50506943502539773349318e-08); u = mla(u, s, 2.75573131776846360512547e-06); u = mla(u, s, -0.000198412698278911770864914); u = mla(u, s, 0.0083333333333191845961746); u = mla(u, s, -0.166666666666666130709393); u = u * s * t; r.x = t + u; if (xisnegzero(d)) r.x = -0.0; u = -1.13615350239097429531523e-11; u = mla(u, s, 2.08757471207040055479366e-09); u = mla(u, s, -2.75573144028847567498567e-07); u = mla(u, s, 2.48015872890001867311915e-05); u = mla(u, s, -0.00138888888888714019282329); u = mla(u, s, 0.0416666666666665519592062); u = mla(u, s, -0.5); r.y = u * s + 1; if ((ql & 1) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((ql & 2) != 0) { r.x = -r.x; } if (((ql+1) & 2) != 0) { r.y = -r.y; } return r; } EXPORT CONST Sleef_double2 xsincos_u1(double d) { double u; Sleef_double2 r, s, t, x; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(d * (2 * M_1_PI)); u = mla(ql, -PI_A2*0.5, d); s = ddadd_d2_d_d (u, ql * (-PI_B2*0.5)); } else if (fabsk(d) < TRIGRANGEMAX) { const double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24); ql = rintk(d * (2 * M_1_PI) - dqh); u = mla(dqh, -PI_A*0.5, d); s = ddadd_d2_d_d(u, ql * (-PI_A*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_C*0.5)); s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5)); } else { ddi_t ddi = rempi(d); ql = ddi.i; s = ddi.dd; if (xisinf(d) || xisnan(d)) s = dd(SLEEF_NAN, SLEEF_NAN); } t = s; s.x = ddsqu_d_d2(s); u = 1.58938307283228937328511e-10; u = mla(u, s.x, -2.50506943502539773349318e-08); u = mla(u, s.x, 2.75573131776846360512547e-06); u = mla(u, s.x, -0.000198412698278911770864914); u = mla(u, s.x, 0.0083333333333191845961746); u = mla(u, s.x, -0.166666666666666130709393); u *= s.x * t.x; x = ddadd_d2_d2_d(t, u); r.x = x.x + x.y; if (xisnegzero(d)) r.x = -0.0; u = -1.13615350239097429531523e-11; u = mla(u, s.x, 2.08757471207040055479366e-09); u = mla(u, s.x, -2.75573144028847567498567e-07); u = mla(u, s.x, 2.48015872890001867311915e-05); u = mla(u, s.x, -0.00138888888888714019282329); u = mla(u, s.x, 0.0416666666666665519592062); u = mla(u, s.x, -0.5); x = ddadd_d2_d_d2(1, ddmul_d2_d_d(s.x, u)); r.y = x.x + x.y; if ((ql & 1) != 0) { u = r.y; r.y = r.x; r.x = u; } if ((ql & 2) != 0) { r.x = -r.x; } if (((ql+1) & 2) != 0) { r.y = -r.y; } return r; } EXPORT CONST Sleef_double2 xsincospi_u05(double d) { double u, s, t; Sleef_double2 r, x, s2; u = d * 4; int q = ceilk(u) & ~(int)1; s = u - (double)q; t = s; s = s * s; s2 = ddmul_d2_d_d(t, t); // u = -2.02461120785182399295868e-14; u = mla(u, s, 6.94821830580179461327784e-12); u = mla(u, s, -1.75724749952853179952664e-09); u = mla(u, s, 3.13361688966868392878422e-07); u = mla(u, s, -3.6576204182161551920361e-05); u = mla(u, s, 0.00249039457019271850274356); x = ddadd2_d2_d_d2(u * s, dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), dd(0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_d2_d2_d(x, t); r.x = x.x + x.y; if (xisnegzero(d)) r.x = -0.0; // u = 9.94480387626843774090208e-16; u = mla(u, s, -3.89796226062932799164047e-13); u = mla(u, s, 1.15011582539996035266901e-10); u = mla(u, s, -2.4611369501044697495359e-08); u = mla(u, s, 3.59086044859052754005062e-06); u = mla(u, s, -0.000325991886927389905997954); x = ddadd2_d2_d_d2(u * s, dd(0.0158543442438155018914259, -1.04693272280631521908845e-18)); x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), dd(-0.308425137534042437259529, -1.95698492133633550338345e-17)); x = ddadd2_d2_d2_d(ddmul_d2_d2_d2(x, s2), 1); r.y = x.x + x.y; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (fabsk(d) > TRIGRANGEMAX3/4) { r.x = 0; r.y = 1; } if (xisinf(d)) { r.x = r.y = SLEEF_NAN; } return r; } EXPORT CONST Sleef_double2 xsincospi_u35(double d) { double u, s, t; Sleef_double2 r; u = d * 4; int q = ceilk(u) & ~(int)1; s = u - (double)q; t = s; s = s * s; // u = +0.6880638894766060136e-11; u = mla(u, s, -0.1757159564542310199e-8); u = mla(u, s, +0.3133616327257867311e-6); u = mla(u, s, -0.3657620416388486452e-4); u = mla(u, s, +0.2490394570189932103e-2); u = mla(u, s, -0.8074551218828056320e-1); u = mla(u, s, +0.7853981633974482790e+0); r.x = u * t; // u = -0.3860141213683794352e-12; u = mla(u, s, +0.1150057888029681415e-9); u = mla(u, s, -0.2461136493006663553e-7); u = mla(u, s, +0.3590860446623516713e-5); u = mla(u, s, -0.3259918869269435942e-3); u = mla(u, s, +0.1585434424381541169e-1); u = mla(u, s, -0.3084251375340424373e+0); u = mla(u, s, 1); r.y = u; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (fabsk(d) > TRIGRANGEMAX3/4) { r.x = 0; r.y = 1; } if (xisinf(d)) { r.x = r.y = SLEEF_NAN; } return r; } static INLINE CONST Sleef_double2 sinpik(double d) { double u, s, t; Sleef_double2 x, s2; u = d * 4; int q = ceilk(u) & ~1; int o = (q & 2) != 0; s = u - (double)q; t = s; s = s * s; s2 = ddmul_d2_d_d(t, t); // u = o ? 9.94480387626843774090208e-16 : -2.02461120785182399295868e-14; u = mla(u, s, o ? -3.89796226062932799164047e-13 : 6.94821830580179461327784e-12); u = mla(u, s, o ? 1.15011582539996035266901e-10 : -1.75724749952853179952664e-09); u = mla(u, s, o ? -2.4611369501044697495359e-08 : 3.13361688966868392878422e-07); u = mla(u, s, o ? 3.59086044859052754005062e-06 : -3.6576204182161551920361e-05); u = mla(u, s, o ? -0.000325991886927389905997954 : 0.00249039457019271850274356); x = ddadd2_d2_d_d2(u * s, o ? dd(0.0158543442438155018914259, -1.04693272280631521908845e-18) : dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), o ? dd(-0.308425137534042437259529, -1.95698492133633550338345e-17) : dd(0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_d2_d2_d2(x, o ? s2 : dd(t, 0)); x = o ? ddadd2_d2_d2_d(x, 1) : x; // if ((q & 4) != 0) { x.x = -x.x; x.y = -x.y; } return x; } EXPORT CONST double xsinpi_u05(double d) { Sleef_double2 x = sinpik(d); double r = x.x + x.y; if (xisnegzero(d)) r = -0.0; if (fabsk(d) > TRIGRANGEMAX3/4) r = 0; if (xisinf(d)) r = SLEEF_NAN; return r; } static INLINE CONST Sleef_double2 cospik(double d) { double u, s, t; Sleef_double2 x, s2; u = d * 4; int q = ceilk(u) & ~1; int o = (q & 2) == 0; s = u - (double)q; t = s; s = s * s; s2 = ddmul_d2_d_d(t, t); // u = o ? 9.94480387626843774090208e-16 : -2.02461120785182399295868e-14; u = mla(u, s, o ? -3.89796226062932799164047e-13 : 6.94821830580179461327784e-12); u = mla(u, s, o ? 1.15011582539996035266901e-10 : -1.75724749952853179952664e-09); u = mla(u, s, o ? -2.4611369501044697495359e-08 : 3.13361688966868392878422e-07); u = mla(u, s, o ? 3.59086044859052754005062e-06 : -3.6576204182161551920361e-05); u = mla(u, s, o ? -0.000325991886927389905997954 : 0.00249039457019271850274356); x = ddadd2_d2_d_d2(u * s, o ? dd(0.0158543442438155018914259, -1.04693272280631521908845e-18) : dd(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_d2_d2_d2(ddmul_d2_d2_d2(s2, x), o ? dd(-0.308425137534042437259529, -1.95698492133633550338345e-17) : dd(0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_d2_d2_d2(x, o ? s2 : dd(t, 0)); x = o ? ddadd2_d2_d2_d(x, 1) : x; // if (((q+2) & 4) != 0) { x.x = -x.x; x.y = -x.y; } return x; } EXPORT CONST double xcospi_u05(double d) { Sleef_double2 x = cospik(d); double r = x.x + x.y; if (fabsk(d) > TRIGRANGEMAX3/4) r = 1; if (xisinf(d)) r = SLEEF_NAN; return r; } EXPORT CONST double xtan(double d) { double u, s, x; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(d * (2 * M_1_PI)); x = mla(ql, -PI_A2*0.5, d); x = mla(ql, -PI_B2*0.5, x); } else if (fabsk(d) < 1e+7) { double dqh = trunck(d * ((2 * M_1_PI) / (1 << 24))) * (double)(1 << 24); ql = rintk(d * (2 * M_1_PI) - dqh); x = mla(dqh, -PI_A * 0.5, d); x = mla( ql, -PI_A * 0.5, x); x = mla(dqh, -PI_B * 0.5, x); x = mla( ql, -PI_B * 0.5, x); x = mla(dqh, -PI_C * 0.5, x); x = mla( ql, -PI_C * 0.5, x); x = mla(dqh + ql, -PI_D * 0.5, x); } else { ddi_t ddi = rempi(d); ql = ddi.i; x = ddi.dd.x + ddi.dd.y; if (xisinf(d) || xisnan(d)) x = SLEEF_NAN; } s = x * x; if ((ql & 1) != 0) x = -x; u = 9.99583485362149960784268e-06; u = mla(u, s, -4.31184585467324750724175e-05); u = mla(u, s, 0.000103573238391744000389851); u = mla(u, s, -0.000137892809714281708733524); u = mla(u, s, 0.000157624358465342784274554); u = mla(u, s, -6.07500301486087879295969e-05); u = mla(u, s, 0.000148898734751616411290179); u = mla(u, s, 0.000219040550724571513561967); u = mla(u, s, 0.000595799595197098359744547); u = mla(u, s, 0.00145461240472358871965441); u = mla(u, s, 0.0035923150771440177410343); u = mla(u, s, 0.00886321546662684547901456); u = mla(u, s, 0.0218694899718446938985394); u = mla(u, s, 0.0539682539049961967903002); u = mla(u, s, 0.133333333334818976423364); u = mla(u, s, 0.333333333333320047664472); u = mla(s, u * x, x); if ((ql & 1) != 0) u = 1.0 / u; return u; } EXPORT CONST double xtan_u1(double d) { double u; Sleef_double2 s, t, x; int ql; if (fabsk(d) < TRIGRANGEMAX2) { ql = rintk(d * (2 * M_1_PI)); u = mla(ql, -PI_A2*0.5, d); s = ddadd_d2_d_d(u, ql * (-PI_B2*0.5)); } else if (fabsk(d) < TRIGRANGEMAX) { const double dqh = trunck(d * (M_2_PI / (1 << 24))) * (double)(1 << 24); s = ddadd2_d2_d2_d(ddmul_d2_d2_d(dd(M_2_PI_H, M_2_PI_L), d), (d < 0 ? -0.5 : 0.5) - dqh); ql = s.x + s.y; u = mla(dqh, -PI_A*0.5, d); s = ddadd_d2_d_d (u, ql * (-PI_A*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_B*0.5)); s = ddadd2_d2_d2_d(s, dqh * (-PI_C*0.5)); s = ddadd2_d2_d2_d(s, ql * (-PI_C*0.5)); s = ddadd_d2_d2_d(s, (dqh + ql) * (-PI_D*0.5)); } else { ddi_t ddi = rempi(d); ql = ddi.i; s = ddi.dd; if (xisinf(d) || xisnan(d)) s.x = SLEEF_NAN; } if ((ql & 1) != 0) s = ddneg_d2_d2(s); t = s; s = ddsqu_d2_d2(s); u = 1.01419718511083373224408e-05; u = mla(u, s.x, -2.59519791585924697698614e-05); u = mla(u, s.x, 5.23388081915899855325186e-05); u = mla(u, s.x, -3.05033014433946488225616e-05); u = mla(u, s.x, 7.14707504084242744267497e-05); u = mla(u, s.x, 8.09674518280159187045078e-05); u = mla(u, s.x, 0.000244884931879331847054404); u = mla(u, s.x, 0.000588505168743587154904506); u = mla(u, s.x, 0.00145612788922812427978848); u = mla(u, s.x, 0.00359208743836906619142924); u = mla(u, s.x, 0.00886323944362401618113356); u = mla(u, s.x, 0.0218694882853846389592078); u = mla(u, s.x, 0.0539682539781298417636002); u = mla(u, s.x, 0.133333333333125941821962); x = ddadd_d2_d_d2(1, ddmul_d2_d2_d2(ddadd_d2_d_d(0.333333333333334980164153, u * s.x), s)); x = ddmul_d2_d2_d2(t, x); if ((ql & 1) != 0) x = ddrec_d2_d2(x); u = x.x + x.y; if (xisnegzero(d)) u = d; return u; } EXPORT CONST double xlog(double d) { double x, x2, t, m; int e; int o = d < DBL_MIN; if (o) d *= (double)(1LL << 32) * (double)(1LL << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = (m-1) / (m+1); x2 = x * x; t = 0.153487338491425068243146; t = mla(t, x2, 0.152519917006351951593857); t = mla(t, x2, 0.181863266251982985677316); t = mla(t, x2, 0.222221366518767365905163); t = mla(t, x2, 0.285714294746548025383248); t = mla(t, x2, 0.399999999950799600689777); t = mla(t, x2, 0.6666666666667778740063); t = mla(t, x2, 2); x = x * t + 0.693147180559945286226764 * e; if (xisinf(d)) x = SLEEF_INFINITY; if (d < 0 || xisnan(d)) x = SLEEF_NAN; if (d == 0) x = -SLEEF_INFINITY; return x; } EXPORT CONST double xexp(double d) { int q = (int)rintk(d * R_LN2); double s, u; s = mla(q, -L2U, d); s = mla(q, -L2L, s); u = 2.08860621107283687536341e-09; u = mla(u, s, 2.51112930892876518610661e-08); u = mla(u, s, 2.75573911234900471893338e-07); u = mla(u, s, 2.75572362911928827629423e-06); u = mla(u, s, 2.4801587159235472998791e-05); u = mla(u, s, 0.000198412698960509205564975); u = mla(u, s, 0.00138888888889774492207962); u = mla(u, s, 0.00833333333331652721664984); u = mla(u, s, 0.0416666666666665047591422); u = mla(u, s, 0.166666666666666851703837); u = mla(u, s, 0.5); u = s * s * u + s + 1; u = ldexp2k(u, q); if (d > 709.78271114955742909217217426) u = SLEEF_INFINITY; if (d < -1000) u = 0; return u; } static INLINE CONST double expm1k(double d) { int q = (int)rintk(d * R_LN2); double s, u; s = mla(q, -L2U, d); s = mla(q, -L2L, s); u = 2.08860621107283687536341e-09; u = mla(u, s, 2.51112930892876518610661e-08); u = mla(u, s, 2.75573911234900471893338e-07); u = mla(u, s, 2.75572362911928827629423e-06); u = mla(u, s, 2.4801587159235472998791e-05); u = mla(u, s, 0.000198412698960509205564975); u = mla(u, s, 0.00138888888889774492207962); u = mla(u, s, 0.00833333333331652721664984); u = mla(u, s, 0.0416666666666665047591422); u = mla(u, s, 0.166666666666666851703837); u = mla(u, s, 0.5); u = s * s * u + s; if (q != 0) u = ldexp2k(u + 1, q) - 1; return u; } static INLINE CONST Sleef_double2 logk(double d) { Sleef_double2 x, x2, s; double m, t; int e; int o = d < DBL_MIN; if (o) d *= (double)(1LL << 32) * (double)(1LL << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); x2 = ddsqu_d2_d2(x); t = 0.116255524079935043668677; t = mla(t, x2.x, 0.103239680901072952701192); t = mla(t, x2.x, 0.117754809412463995466069); t = mla(t, x2.x, 0.13332981086846273921509); t = mla(t, x2.x, 0.153846227114512262845736); t = mla(t, x2.x, 0.181818180850050775676507); t = mla(t, x2.x, 0.222222222230083560345903); t = mla(t, x2.x, 0.285714285714249172087875); t = mla(t, x2.x, 0.400000000000000077715612); Sleef_double2 c = dd(0.666666666666666629659233, 3.80554962542412056336616e-17); s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e); s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d2(ddmul_d2_d2_d2(x2, x), ddadd2_d2_d2_d2(ddmul_d2_d2_d(x2, t), c))); return s; } EXPORT CONST double xlog_u1(double d) { Sleef_double2 x, s; double m, t, x2; int e; int o = d < DBL_MIN; if (o) d *= (double)(1LL << 32) * (double)(1LL << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); x2 = x.x * x.x; t = 0.1532076988502701353e+0; t = mla(t, x2, 0.1525629051003428716e+0); t = mla(t, x2, 0.1818605932937785996e+0); t = mla(t, x2, 0.2222214519839380009e+0); t = mla(t, x2, 0.2857142932794299317e+0); t = mla(t, x2, 0.3999999999635251990e+0); t = mla(t, x2, 0.6666666666667333541e+0); s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), (double)e); s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); s = ddadd_d2_d2_d(s, x2 * x.x * t); double r = s.x + s.y; if (xisinf(d)) r = SLEEF_INFINITY; if (d < 0 || xisnan(d)) r = SLEEF_NAN; if (d == 0) r = -SLEEF_INFINITY; return r; } static INLINE CONST double expk(Sleef_double2 d) { int q = (int)rintk((d.x + d.y) * R_LN2); Sleef_double2 s, t; double u; s = ddadd2_d2_d2_d(d, q * -L2U); s = ddadd2_d2_d2_d(s, q * -L2L); s = ddnormalize_d2_d2(s); u = 2.51069683420950419527139e-08; u = mla(u, s.x, 2.76286166770270649116855e-07); u = mla(u, s.x, 2.75572496725023574143864e-06); u = mla(u, s.x, 2.48014973989819794114153e-05); u = mla(u, s.x, 0.000198412698809069797676111); u = mla(u, s.x, 0.0013888888939977128960529); u = mla(u, s.x, 0.00833333333332371417601081); u = mla(u, s.x, 0.0416666666665409524128449); u = mla(u, s.x, 0.166666666666666740681535); u = mla(u, s.x, 0.500000000000000999200722); t = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddsqu_d2_d2(s), u)); t = ddadd_d2_d_d2(1, t); u = ldexpk(t.x + t.y, q); if (d.x < -1000) u = 0; return u; } EXPORT CONST double xpow(double x, double y) { int yisint = xisint(y); int yisodd = yisint && xisodd(y); Sleef_double2 d = ddmul_d2_d2_d(logk(fabsk(x)), y); double result = expk(d); if (d.x > 709.78271114955742909217217426) result = SLEEF_INFINITY; result = xisnan(result) ? SLEEF_INFINITY : result; result *= (x > 0 ? 1 : (!yisint ? SLEEF_NAN : (yisodd ? -1 : 1))); double efx = mulsign(fabsk(x) - 1, y); if (xisinf(y)) result = efx < 0 ? 0.0 : (efx == 0 ? 1.0 : SLEEF_INFINITY); if (xisinf(x) || x == 0) result = (yisodd ? sign(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : SLEEF_INFINITY); if (xisnan(x) || xisnan(y)) result = SLEEF_NAN; if (y == 0 || x == 1) result = 1; return result; } static INLINE CONST Sleef_double2 expk2(Sleef_double2 d) { int q = (int)rintk((d.x + d.y) * R_LN2); Sleef_double2 s, t; double u; s = ddadd2_d2_d2_d(d, q * -L2U); s = ddadd2_d2_d2_d(s, q * -L2L); u = +0.1602472219709932072e-9; u = mla(u, s.x, +0.2092255183563157007e-8); u = mla(u, s.x, +0.2505230023782644465e-7); u = mla(u, s.x, +0.2755724800902135303e-6); u = mla(u, s.x, +0.2755731892386044373e-5); u = mla(u, s.x, +0.2480158735605815065e-4); u = mla(u, s.x, +0.1984126984148071858e-3); u = mla(u, s.x, +0.1388888888886763255e-2); u = mla(u, s.x, +0.8333333333333347095e-2); u = mla(u, s.x, +0.4166666666666669905e-1); t = ddadd2_d2_d2_d(ddmul_d2_d2_d(s, u), +0.1666666666666666574e+0); t = ddadd2_d2_d2_d(ddmul_d2_d2_d2(s, t), 0.5); t = ddadd2_d2_d2_d2(s, ddmul_d2_d2_d2(ddsqu_d2_d2(s), t)); t = ddadd2_d2_d_d2(1, t); t.x = ldexp2k(t.x, q); t.y = ldexp2k(t.y, q); return d.x < -1000 ? dd(0, 0) : t; } EXPORT CONST double xsinh(double x) { double y = fabsk(x); Sleef_double2 d = expk2(dd(y, 0)); d = ddsub_d2_d2_d2(d, ddrec_d2_d2(d)); y = (d.x + d.y) * 0.5; y = fabsk(x) > 710 ? SLEEF_INFINITY : y; y = xisnan(y) ? SLEEF_INFINITY : y; y = mulsign(y, x); y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xcosh(double x) { double y = fabsk(x); Sleef_double2 d = expk2(dd(y, 0)); d = ddadd_d2_d2_d2(d, ddrec_d2_d2(d)); y = (d.x + d.y) * 0.5; y = fabsk(x) > 710 ? SLEEF_INFINITY : y; y = xisnan(y) ? SLEEF_INFINITY : y; y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xtanh(double x) { double y = fabsk(x); Sleef_double2 d = expk2(dd(y, 0)); Sleef_double2 e = ddrec_d2_d2(d); d = dddiv_d2_d2_d2(ddsub_d2_d2_d2(d, e), ddadd_d2_d2_d2(d, e)); y = d.x + d.y; y = fabsk(x) > 18.714973875 ? 1.0 : y; y = xisnan(y) ? 1.0 : y; y = mulsign(y, x); y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xsinh_u35(double x) { double e = expm1k(fabsk(x)); double y = (e + 2) / (e + 1) * (0.5 * e); y = fabsk(x) > 709 ? SLEEF_INFINITY : y; y = xisnan(y) ? SLEEF_INFINITY : y; y = mulsign(y, x); y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xcosh_u35(double x) { double e = xexp(fabsk(x)); double y = 0.5 / e + 0.5 * e; y = fabsk(x) > 709 ? SLEEF_INFINITY : y; y = xisnan(y) ? SLEEF_INFINITY : y; y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xtanh_u35(double x) { double y = fabsk(x); double d = expm1k(2*y); y = d / (d + 2); y = fabsk(x) > 18.714973875 ? 1.0 : y; y = xisnan(y) ? 1.0 : y; y = mulsign(y, x); y = xisnan(x) ? SLEEF_NAN : y; return y; } static INLINE CONST Sleef_double2 logk2(Sleef_double2 d) { Sleef_double2 x, x2, m, s; double t; int e; e = ilogbk(d.x * (1.0/0.75)); m.x = ldexp2k(d.x, -e); m.y = ldexp2k(d.y, -e); x = dddiv_d2_d2_d2(ddadd2_d2_d2_d(m, -1), ddadd2_d2_d2_d(m, 1)); x2 = ddsqu_d2_d2(x); t = 0.13860436390467167910856; t = mla(t, x2.x, 0.131699838841615374240845); t = mla(t, x2.x, 0.153914168346271945653214); t = mla(t, x2.x, 0.181816523941564611721589); t = mla(t, x2.x, 0.22222224632662035403996); t = mla(t, x2.x, 0.285714285511134091777308); t = mla(t, x2.x, 0.400000000000914013309483); t = mla(t, x2.x, 0.666666666666664853302393); s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), e); s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d(ddmul_d2_d2_d2(x2, x), t)); return s; } EXPORT CONST double xasinh(double x) { double y = fabsk(x); Sleef_double2 d; d = y > 1 ? ddrec_d2_d(x) : dd(y, 0); d = ddsqrt_d2_d2(ddadd2_d2_d2_d(ddsqu_d2_d2(d), 1)); d = y > 1 ? ddmul_d2_d2_d(d, y) : d; d = logk2(ddnormalize_d2_d2(ddadd_d2_d2_d(d, x))); y = d.x + d.y; y = (fabsk(x) > SQRT_DBL_MAX || xisnan(y)) ? mulsign(SLEEF_INFINITY, x) : y; y = xisnan(x) ? SLEEF_NAN : y; y = xisnegzero(x) ? -0.0 : y; return y; } EXPORT CONST double xacosh(double x) { Sleef_double2 d = logk2(ddadd2_d2_d2_d(ddmul_d2_d2_d2(ddsqrt_d2_d2(ddadd2_d2_d_d(x, 1)), ddsqrt_d2_d2(ddadd2_d2_d_d(x, -1))), x)); double y = d.x + d.y; y = (x > SQRT_DBL_MAX || xisnan(y)) ? SLEEF_INFINITY : y; y = x == 1.0 ? 0.0 : y; y = x < 1.0 ? SLEEF_NAN : y; y = xisnan(x) ? SLEEF_NAN : y; return y; } EXPORT CONST double xatanh(double x) { double y = fabsk(x); Sleef_double2 d = logk2(dddiv_d2_d2_d2(ddadd2_d2_d_d(1, y), ddadd2_d2_d_d(1, -y))); y = y > 1.0 ? SLEEF_NAN : (y == 1.0 ? SLEEF_INFINITY : (d.x + d.y) * 0.5); y = mulsign(y, x); y = (xisinf(x) || xisnan(y)) ? SLEEF_NAN : y; return y; } // EXPORT CONST double xcbrt(double d) { // max error : 2 ulps double x, y, q = 1.0; int e, r; e = ilogbk(fabsk(d))+1; d = ldexp2k(d, -e); r = (e + 6144) % 3; q = (r == 1) ? 1.2599210498948731647672106 : q; q = (r == 2) ? 1.5874010519681994747517056 : q; q = ldexp2k(q, (e + 6144) / 3 - 2048); q = mulsign(q, d); d = fabsk(d); x = -0.640245898480692909870982; x = mla(x, d, 2.96155103020039511818595); x = mla(x, d, -5.73353060922947843636166); x = mla(x, d, 6.03990368989458747961407); x = mla(x, d, -3.85841935510444988821632); x = mla(x, d, 2.2307275302496609725722); y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0); y = d * x * x; y = (y - (2.0 / 3.0) * y * (y * x - 1)) * q; return y; } EXPORT CONST double xcbrt_u1(double d) { double x, y, z; Sleef_double2 q2 = dd(1, 0), u, v; int e, r; e = ilogbk(fabsk(d))+1; d = ldexp2k(d, -e); r = (e + 6144) % 3; q2 = (r == 1) ? dd(1.2599210498948731907, -2.5899333753005069177e-17) : q2; q2 = (r == 2) ? dd(1.5874010519681995834, -1.0869008194197822986e-16) : q2; q2.x = mulsign(q2.x, d); q2.y = mulsign(q2.y, d); d = fabsk(d); x = -0.640245898480692909870982; x = mla(x, d, 2.96155103020039511818595); x = mla(x, d, -5.73353060922947843636166); x = mla(x, d, 6.03990368989458747961407); x = mla(x, d, -3.85841935510444988821632); x = mla(x, d, 2.2307275302496609725722); y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0); z = x; u = ddmul_d2_d_d(x, x); u = ddmul_d2_d2_d2(u, u); u = ddmul_d2_d2_d(u, d); u = ddadd2_d2_d2_d(u, -x); y = u.x + u.y; y = -2.0 / 3.0 * y * z; v = ddadd2_d2_d2_d(ddmul_d2_d_d(z, z), y); v = ddmul_d2_d2_d(v, d); v = ddmul_d2_d2_d2(v, q2); z = ldexp2k(v.x + v.y, (e + 6144) / 3 - 2048); if (xisinf(d)) { z = mulsign(SLEEF_INFINITY, q2.x); } if (d == 0) { z = mulsign(0, q2.x); } return z; } EXPORT CONST double xexp2(double d) { int q = (int)rintk(d); double s, u; s = d - q; u = +0.4434359082926529454e-9; u = mla(u, s, +0.7073164598085707425e-8); u = mla(u, s, +0.1017819260921760451e-6); u = mla(u, s, +0.1321543872511327615e-5); u = mla(u, s, +0.1525273353517584730e-4); u = mla(u, s, +0.1540353045101147808e-3); u = mla(u, s, +0.1333355814670499073e-2); u = mla(u, s, +0.9618129107597600536e-2); u = mla(u, s, +0.5550410866482046596e-1); u = mla(u, s, +0.2402265069591012214e+0); u = mla(u, s, +0.6931471805599452862e+0); u = ddnormalize_d2_d2(ddadd_d2_d_d2(1, ddmul_d2_d_d(u, s))).x; u = ldexp2k(u, q); if (d >= 1024) u = SLEEF_INFINITY; if (d < -2000) u = 0; return u; } EXPORT CONST double xexp10(double d) { int q = (int)rintk(d * LOG10_2); double s, u; s = mla(q, -L10U, d); s = mla(q, -L10L, s); u = +0.2411463498334267652e-3; u = mla(u, s, +0.1157488415217187375e-2); u = mla(u, s, +0.5013975546789733659e-2); u = mla(u, s, +0.1959762320720533080e-1); u = mla(u, s, +0.6808936399446784138e-1); u = mla(u, s, +0.2069958494722676234e+0); u = mla(u, s, +0.5393829292058536229e+0); u = mla(u, s, +0.1171255148908541655e+1); u = mla(u, s, +0.2034678592293432953e+1); u = mla(u, s, +0.2650949055239205876e+1); u = mla(u, s, +0.2302585092994045901e+1); u = ddnormalize_d2_d2(ddadd_d2_d_d2(1, ddmul_d2_d_d(u, s))).x; u = ldexp2k(u, q); if (d > 308.25471555991671) u = SLEEF_INFINITY; // log10(DBL_MAX) if (d < -350) u = 0; return u; } EXPORT CONST double xexpm1(double a) { Sleef_double2 d = ddadd2_d2_d2_d(expk2(dd(a, 0)), -1.0); double x = d.x + d.y; if (a > 709.782712893383996732223) x = SLEEF_INFINITY; // log(DBL_MAX) if (a < -36.736800569677101399113302437) x = -1; // log(1 - nexttoward(1, 0)) if (xisnegzero(a)) x = -0.0; return x; } EXPORT CONST double xlog10(double d) { Sleef_double2 x, s; double m, t, x2; int e; int o = d < DBL_MIN; if (o) d *= (double)(1LL << 32) * (double)(1LL << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); x2 = x.x * x.x; t = +0.6653725819576758460e-1; t = mla(t, x2, +0.6625722782820833712e-1); t = mla(t, x2, +0.7898105214313944078e-1); t = mla(t, x2, +0.9650955035715275132e-1); t = mla(t, x2, +0.1240841409721444993e+0); t = mla(t, x2, +0.1737177927454605086e+0); t = mla(t, x2, +0.2895296546021972617e+0); s = ddmul_d2_d2_d(dd(0.30102999566398119802, -2.803728127785170339e-18), (double)e); s = ddadd_d2_d2_d2(s, ddmul_d2_d2_d2(x, dd(0.86858896380650363334, 1.1430059694096389311e-17))); s = ddadd_d2_d2_d(s, x2 * x.x * t); double r = s.x + s.y; if (xisinf(d)) r = SLEEF_INFINITY; if (d < 0 || xisnan(d)) r = SLEEF_NAN; if (d == 0) r = -SLEEF_INFINITY; return r; } EXPORT CONST double xlog2(double d) { Sleef_double2 x, s; double m, t, x2; int e; int o = d < DBL_MIN; if (o) d *= (double)(1LL << 32) * (double)(1LL << 32); e = ilogb2k(d * (1.0/0.75)); m = ldexp3k(d, -e); if (o) e -= 64; x = dddiv_d2_d2_d2(ddadd2_d2_d_d(-1, m), ddadd2_d2_d_d(1, m)); x2 = x.x * x.x; t = +0.2211941750456081490e+0; t = mla(t, x2, +0.2200768693152277689e+0); t = mla(t, x2, +0.2623708057488514656e+0); t = mla(t, x2, +0.3205977477944495502e+0); t = mla(t, x2, +0.4121985945485324709e+0); t = mla(t, x2, +0.5770780162997058982e+0); t = mla(t, x2, +0.96179669392608091449 ); s = ddadd2_d2_d_d2(e, ddmul_d2_d2_d2(x, dd(2.885390081777926774, 6.0561604995516736434e-18))); s = ddadd2_d2_d2_d(s, x2 * x.x * t); double r = s.x + s.y; if (xisinf(d)) r = SLEEF_INFINITY; if (d < 0 || xisnan(d)) r = SLEEF_NAN; if (d == 0) r = -SLEEF_INFINITY; return r; } EXPORT CONST double xlog1p(double d) { Sleef_double2 x, s; double m, t, x2; int e; double dp1 = d + 1; int o = dp1 < DBL_MIN; if (o) dp1 *= (double)(1LL << 32) * (double)(1LL << 32); e = ilogb2k(dp1 * (1.0/0.75)); t = ldexp3k(1, -e); m = mla(d, t, t - 1); if (o) e -= 64; x = dddiv_d2_d2_d2(dd(m, 0), ddadd_d2_d_d(2, m)); x2 = x.x * x.x; t = 0.1532076988502701353e+0; t = mla(t, x2, 0.1525629051003428716e+0); t = mla(t, x2, 0.1818605932937785996e+0); t = mla(t, x2, 0.2222214519839380009e+0); t = mla(t, x2, 0.2857142932794299317e+0); t = mla(t, x2, 0.3999999999635251990e+0); t = mla(t, x2, 0.6666666666667333541e+0); s = ddmul_d2_d2_d(dd(0.693147180559945286226764, 2.319046813846299558417771e-17), (double)e); s = ddadd_d2_d2_d2(s, ddscale_d2_d2_d(x, 2)); s = ddadd_d2_d2_d(s, x2 * x.x * t); double r = s.x + s.y; if (d > 1e+307) r = SLEEF_INFINITY; if (d < -1 || xisnan(d)) r = SLEEF_NAN; if (d == -1) r = -SLEEF_INFINITY; if (xisnegzero(d)) r = -0.0; return r; } // EXPORT CONST double xfma(double x, double y, double z) { double h2 = x * y + z, q = 1; if (fabsk(h2) < 1e-300) { const double c0 = 1ULL << 54, c1 = c0 * c0, c2 = c1 * c1; x *= c1; y *= c1; z *= c2; q = 1.0 / c2; } if (fabsk(h2) > 1e+299) { const double c0 = 1ULL << 54, c1 = c0 * c0, c2 = c1 * c1; x *= 1.0 / c1; y *= 1.0 / c1; z *= 1. / c2; q = c2; } Sleef_double2 d = ddmul_d2_d_d(x, y); d = ddadd2_d2_d2_d(d, z); double ret = (x == 0 || y == 0) ? z : (d.x + d.y); if ((xisinf(z) && !xisinf(x) && !xisnan(x) && !xisinf(y) && !xisnan(y))) h2 = z; return (xisinf(h2) || xisnan(h2)) ? h2 : ret*q; } EXPORT CONST double xsqrt_u05(double d) { double q = 0.5; d = d < 0 ? SLEEF_NAN : d; if (d < 8.636168555094445E-78) { d *= 1.157920892373162E77; q = 2.9387358770557188E-39 * 0.5; } if (d > 1.3407807929942597e+154) { d *= 7.4583407312002070e-155; q = 1.1579208923731620e+77 * 0.5; } // http://en.wikipedia.org/wiki/Fast_inverse_square_root double x = longBitsToDouble(0x5fe6ec85e7de30da - (doubleToRawLongBits(d + 1e-320) >> 1)); x = x * (1.5 - 0.5 * d * x * x); x = x * (1.5 - 0.5 * d * x * x); x = x * (1.5 - 0.5 * d * x * x) * d; Sleef_double2 d2 = ddmul_d2_d2_d2(ddadd2_d2_d_d2(d, ddmul_d2_d_d(x, x)), ddrec_d2_d(x)); double ret = (d2.x + d2.y) * q; ret = d == SLEEF_INFINITY ? SLEEF_INFINITY : ret; ret = d == 0 ? d : ret; return ret; } EXPORT CONST double xsqrt_u35(double d) { return xsqrt_u05(d); } EXPORT CONST double xsqrt(double d) { return SQRT(d); } EXPORT CONST double xfabs(double x) { return fabsk(x); } EXPORT CONST double xcopysign(double x, double y) { return copysignk(x, y); } EXPORT CONST double xfmax(double x, double y) { return y != y ? x : (x > y ? x : y); } EXPORT CONST double xfmin(double x, double y) { return y != y ? x : (x < y ? x : y); } EXPORT CONST double xfdim(double x, double y) { double ret = x - y; if (ret < 0 || x == y) ret = 0; return ret; } EXPORT CONST double xtrunc(double x) { double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31))); fr = fr - (int32_t)fr; return (xisinf(x) || fabsk(x) >= (double)(1LL << 52)) ? x : copysignk(x - fr, x); } EXPORT CONST double xfloor(double x) { double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31))); fr = fr - (int32_t)fr; fr = fr < 0 ? fr+1.0 : fr; return (xisinf(x) || fabsk(x) >= (double)(1LL << 52)) ? x : copysignk(x - fr, x); } EXPORT CONST double xceil(double x) { double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31))); fr = fr - (int32_t)fr; fr = fr <= 0 ? fr : fr-1.0; return (xisinf(x) || fabsk(x) >= (double)(1LL << 52)) ? x : copysignk(x - fr, x); } EXPORT CONST double xround(double d) { double x = d + 0.5; double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31))); fr = fr - (int32_t)fr; if (fr == 0 && x <= 0) x--; fr = fr < 0 ? fr+1.0 : fr; x = d == 0.49999999999999994449 ? 0 : x; // nextafter(0.5, 0) return (xisinf(d) || fabsk(d) >= (double)(1LL << 52)) ? d : copysignk(x - fr, d); } EXPORT CONST double xrint(double d) { double x = d + 0.5; double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31))); int32_t isodd = (1 & (int32_t)fr) != 0; fr = fr - (int32_t)fr; fr = (fr < 0 || (fr == 0 && isodd)) ? fr+1.0 : fr; x = d == 0.50000000000000011102 ? 0 : x; // nextafter(0.5, 1) return (xisinf(d) || fabsk(d) >= (double)(1LL << 52)) ? d : copysignk(x - fr, d); } EXPORT CONST double xhypot_u05(double x, double y) { x = fabsk(x); y = fabsk(y); double min = fmink(x, y), n = min; double max = fmaxk(x, y), d = max; if (max < DBL_MIN) { n *= 1ULL << 54; d *= 1ULL << 54; } Sleef_double2 t = dddiv_d2_d2_d2(dd(n, 0), dd(d, 0)); t = ddmul_d2_d2_d(ddsqrt_d2_d2(ddadd2_d2_d2_d(ddsqu_d2_d2(t), 1)), max); double ret = t.x + t.y; if (xisnan(ret)) ret = SLEEF_INFINITY; if (min == 0) ret = max; if (xisnan(x) || xisnan(y)) ret = SLEEF_NAN; if (x == SLEEF_INFINITY || y == SLEEF_INFINITY) ret = SLEEF_INFINITY; return ret; } EXPORT CONST double xhypot_u35(double x, double y) { x = fabsk(x); y = fabsk(y); double min = fmink(x, y); double max = fmaxk(x, y); double t = min / max; double ret = max * SQRT(1 + t*t); if (min == 0) ret = max; if (xisnan(x) || xisnan(y)) ret = SLEEF_NAN; if (x == SLEEF_INFINITY || y == SLEEF_INFINITY) ret = SLEEF_INFINITY; return ret; } EXPORT CONST double xnextafter(double x, double y) { union { double f; int64_t i; } cx; x = x == 0 ? mulsign(0, y) : x; cx.f = x; int c = (cx.i < 0) == (y < x); if (c) cx.i = -(cx.i ^ (1ULL << 63)); if (x != y) cx.i--; if (c) cx.i = -(cx.i ^ (1ULL << 63)); if (cx.f == 0 && x != 0) cx.f = mulsign(0, x); if (x == 0 && y == 0) cx.f = y; if (xisnan(x) || xisnan(y)) cx.f = SLEEF_NAN; return cx.f; } EXPORT CONST double xfrfrexp(double x) { union { double f; uint64_t u; } cx; if (fabsk(x) < DBL_MIN) x *= (1ULL << 63); cx.f = x; cx.u &= ~0x7ff0000000000000ULL; cx.u |= 0x3fe0000000000000ULL; if (xisinf(x)) cx.f = mulsign(SLEEF_INFINITY, x); if (x == 0) cx.f = x; return cx.f; } EXPORT CONST int xexpfrexp(double x) { union { double f; uint64_t u; } cx; int ret = 0; if (fabsk(x) < DBL_MIN) { x *= (1ULL << 63); ret = -63; } cx.f = x; ret += (int32_t)(((cx.u >> 52) & 0x7ff)) - 0x3fe; if (x == 0 || xisnan(x) || xisinf(x)) ret = 0; return ret; } static INLINE CONST double toward0(double d) { return d == 0 ? 0 : longBitsToDouble(doubleToRawLongBits(d)-1); } static INLINE CONST double removelsb(double d) { return longBitsToDouble(doubleToRawLongBits(d) & 0xfffffffffffffffeLL); } static INLINE CONST double ptrunc(double x) { double fr = mla(-(double)(1LL << 31), (int32_t)(x * (1.0 / (1LL << 31))), x); return fabsk(x) >= (double)(1LL << 52) ? x : (x - (fr - (int32_t)fr)); } EXPORT CONST double xfmod(double x, double y) { double nu = fabsk(x), de = fabsk(y), s = 1, q; if (de < DBL_MIN) { nu *= 1ULL << 54; de *= 1ULL << 54; s = 1.0 / (1ULL << 54); } Sleef_double2 r = dd(nu, 0); double rde = toward0(1.0 / de); for(int i=0;i < 21;i++) { // ceil(log2(DBL_MAX) / 51) + 1 q = (de+de > r.x && r.x >= de) ? 1 : (toward0(r.x) * rde); r = ddnormalize_d2_d2(ddadd2_d2_d2_d2(r, ddmul_d2_d_d(removelsb(ptrunc(q)), -de))); if (r.x < de) break; } double ret = r.x * s; if (r.x + r.y == de) ret = 0; ret = mulsign(ret, x); if (nu < de) ret = x; if (de == 0) ret = SLEEF_NAN; return ret; } EXPORT CONST Sleef_double2 xmodf(double x) { double fr = x - (double)(1LL << 31) * (int32_t)(x * (1.0 / (1LL << 31))); fr = fr - (int32_t)fr; fr = fabsk(x) >= (double)(1LL << 52) ? 0 : fr; Sleef_double2 ret = { copysignk(fr, x), copysignk(x - fr, x) }; return ret; } typedef struct { Sleef_double2 a, b; } dd2; static CONST dd2 gammak(double a) { Sleef_double2 clc = dd(0, 0), clln = dd(1, 0), clld = dd(1, 0), v = dd(1, 0), x, y, z; double t, u; int otiny = fabsk(a) < 1e-306, oref = a < 0.5; x = otiny ? dd(0, 0) : (oref ? ddadd2_d2_d_d(1, -a) : dd(a, 0)); int o0 = (0.5 <= x.x && x.x <= 1.1), o2 = 2.3 < x.x; y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 1), x)); y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 2), y)); y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 3), y)); y = ddnormalize_d2_d2(ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, 4), y)); clln = (o2 && x.x <= 7) ? y : clln; x = (o2 && x.x <= 7) ? ddadd2_d2_d2_d(x, 5) : x; t = o2 ? (1.0 / x.x) : ddnormalize_d2_d2(ddadd2_d2_d2_d(x, o0 ? -1 : -2)).x; u = o2 ? -156.801412704022726379848862 : (o0 ? +0.2947916772827614196e+2 : +0.7074816000864609279e-7); u = mla(u, t, o2 ? +1.120804464289911606838558160000 : (o0 ? +0.1281459691827820109e+3 : +0.4009244333008730443e-6)); u = mla(u, t, o2 ? +13.39798545514258921833306020000 : (o0 ? +0.2617544025784515043e+3 : +0.1040114641628246946e-5)); u = mla(u, t, o2 ? -0.116546276599463200848033357000 : (o0 ? +0.3287022855685790432e+3 : +0.1508349150733329167e-5)); u = mla(u, t, o2 ? -1.391801093265337481495562410000 : (o0 ? +0.2818145867730348186e+3 : +0.1288143074933901020e-5)); u = mla(u, t, o2 ? +0.015056113040026424412918973400 : (o0 ? +0.1728670414673559605e+3 : +0.4744167749884993937e-6)); u = mla(u, t, o2 ? +0.179540117061234856098844714000 : (o0 ? +0.7748735764030416817e+2 : -0.6554816306542489902e-7)); u = mla(u, t, o2 ? -0.002481743600264997730942489280 : (o0 ? +0.2512856643080930752e+2 : -0.3189252471452599844e-6)); u = mla(u, t, o2 ? -0.029527880945699120504851034100 : (o0 ? +0.5766792106140076868e+1 : +0.1358883821470355377e-6)); u = mla(u, t, o2 ? +0.000540164767892604515196325186 : (o0 ? +0.7270275473996180571e+0 : -0.4343931277157336040e-6)); u = mla(u, t, o2 ? +0.006403362833808069794787256200 : (o0 ? +0.8396709124579147809e-1 : +0.9724785897406779555e-6)); u = mla(u, t, o2 ? -0.000162516262783915816896611252 : (o0 ? -0.8211558669746804595e-1 : -0.2036886057225966011e-5)); u = mla(u, t, o2 ? -0.001914438498565477526465972390 : (o0 ? +0.6828831828341884458e-1 : +0.4373363141819725815e-5)); u = mla(u, t, o2 ? +7.20489541602001055898311517e-05 : (o0 ? -0.7712481339961671511e-1 : -0.9439951268304008677e-5)); u = mla(u, t, o2 ? +0.000839498720672087279971000786 : (o0 ? +0.8337492023017314957e-1 : +0.2050727030376389804e-4)); u = mla(u, t, o2 ? -5.17179090826059219329394422e-05 : (o0 ? -0.9094964931456242518e-1 : -0.4492620183431184018e-4)); u = mla(u, t, o2 ? -0.000592166437353693882857342347 : (o0 ? +0.1000996313575929358e+0 : +0.9945751236071875931e-4)); u = mla(u, t, o2 ? +6.97281375836585777403743539e-05 : (o0 ? -0.1113342861544207724e+0 : -0.2231547599034983196e-3)); u = mla(u, t, o2 ? +0.000784039221720066627493314301 : (o0 ? +0.1255096673213020875e+0 : +0.5096695247101967622e-3)); u = mla(u, t, o2 ? -0.000229472093621399176949318732 : (o0 ? -0.1440498967843054368e+0 : -0.1192753911667886971e-2)); u = mla(u, t, o2 ? -0.002681327160493827160473958490 : (o0 ? +0.1695571770041949811e+0 : +0.2890510330742210310e-2)); u = mla(u, t, o2 ? +0.003472222222222222222175164840 : (o0 ? -0.2073855510284092762e+0 : -0.7385551028674461858e-2)); u = mla(u, t, o2 ? +0.083333333333333333335592087900 : (o0 ? +0.2705808084277815939e+0 : +0.2058080842778455335e-1)); y = ddmul_d2_d2_d2(ddadd2_d2_d2_d(x, -0.5), logk2(x)); y = ddadd2_d2_d2_d2(y, ddneg_d2_d2(x)); y = ddadd2_d2_d2_d2(y, dd(0.91893853320467278056, -3.8782941580672414498e-17)); // 0.5*log(2*M_PI) z = ddadd2_d2_d2_d(ddmul_d2_d_d (u, t), o0 ? -0.4006856343865314862e+0 : -0.6735230105319810201e-1); z = ddadd2_d2_d2_d(ddmul_d2_d2_d(z, t), o0 ? +0.8224670334241132030e+0 : +0.3224670334241132030e+0); z = ddadd2_d2_d2_d(ddmul_d2_d2_d(z, t), o0 ? -0.5772156649015328655e+0 : +0.4227843350984671345e+0); z = ddmul_d2_d2_d(z, t); clc = o2 ? y : z; clld = o2 ? ddadd2_d2_d2_d(ddmul_d2_d_d(u, t), 1) : clld; y = clln; clc = otiny ? dd(83.1776616671934334590333, 3.67103459631568507221878e-15) : // log(2^120) (oref ? ddadd2_d2_d2_d2(dd(1.1447298858494001639, 1.026595116270782638e-17), ddneg_d2_d2(clc)) : clc); // log(M_PI) clln = otiny ? dd(1, 0) : (oref ? clln : clld); if (oref) x = ddmul_d2_d2_d2(clld, sinpik(a - (double)(1LL << 28) * (int32_t)(a * (1.0 / (1LL << 28))))); clld = otiny ? dd(a*((1LL << 60)*(double)(1LL << 60)), 0) : (oref ? x : y); dd2 ret = { clc, dddiv_d2_d2_d2(clln, clld) }; return ret; } EXPORT CONST double xtgamma_u1(double a) { dd2 d = gammak(a); Sleef_double2 y = ddmul_d2_d2_d2(expk2(d.a), d.b); double r = y.x + y.y; r = (a == -SLEEF_INFINITY || (a < 0 && xisint(a)) || (xisnumber(a) && a < 0 && xisnan(r))) ? SLEEF_NAN : r; r = ((a == SLEEF_INFINITY || xisnumber(a)) && a >= -DBL_MIN && (a == 0 || a > 200 || xisnan(r))) ? mulsign(SLEEF_INFINITY, a) : r; return r; } EXPORT CONST double xlgamma_u1(double a) { dd2 d = gammak(a); Sleef_double2 y = ddadd2_d2_d2_d2(d.a, logk2(ddabs_d2_d2(d.b))); double r = y.x + y.y; r = (xisinf(a) || (a <= 0 && xisint(a)) || (xisnumber(a) && xisnan(r))) ? SLEEF_INFINITY : r; return r; } EXPORT CONST double xerf_u1(double a) { double s = a, t, u; Sleef_double2 d; a = fabsk(a); int o0 = a < 1.0, o1 = a < 3.7, o2 = a < 6.0; u = o0 ? (a*a) : a; t = o0 ? +0.6801072401395392157e-20 : o1 ? +0.2830954522087717660e-13 : -0.5846750404269610493e-17; t = mla(t, u, o0 ? -0.2161766247570056391e-18 : o1 ? -0.1509491946179481940e-11 : +0.6076691048812607898e-15); t = mla(t, u, o0 ? +0.4695919173301598752e-17 : o1 ? +0.3827857177807173152e-10 : -0.3007518609604893831e-13); t = mla(t, u, o0 ? -0.9049140419888010819e-16 : o1 ? -0.6139733921558987241e-09 : +0.9427906260824646063e-12); t = mla(t, u, o0 ? +0.1634018903557411517e-14 : o1 ? +0.6985387934608038824e-08 : -0.2100110908269393629e-10); t = mla(t, u, o0 ? -0.2783485786333455216e-13 : o1 ? -0.5988224513034371474e-07 : +0.3534639523461223473e-09); t = mla(t, u, o0 ? +0.4463221276786412722e-12 : o1 ? +0.4005716952355346640e-06 : -0.4664967728285395926e-08); t = mla(t, u, o0 ? -0.6711366622850138987e-11 : o1 ? -0.2132190104575784400e-05 : +0.4943823283769000532e-07); t = mla(t, u, o0 ? +0.9422759050232658346e-10 : o1 ? +0.9092461304042630325e-05 : -0.4271203394761148254e-06); t = mla(t, u, o0 ? -0.1229055530100228477e-08 : o1 ? -0.3079188080966205457e-04 : +0.3034067677404915895e-05); t = mla(t, u, o0 ? +0.1480719281585085023e-07 : o1 ? +0.7971413443082370762e-04 : -0.1776295289066871135e-04); t = mla(t, u, o0 ? -0.1636584469123402714e-06 : o1 ? -0.1387853215225442864e-03 : +0.8524547630559505050e-04); t = mla(t, u, o0 ? +0.1646211436588923363e-05 : o1 ? +0.6469678026257590965e-04 : -0.3290582944961784398e-03); t = mla(t, u, o0 ? -0.1492565035840624866e-04 : o1 ? +0.4996645280372945860e-03 : +0.9696966068789101157e-03); t = mla(t, u, o0 ? +0.1205533298178966496e-03 : o1 ? -0.1622802482842520535e-02 : -0.1812527628046986137e-02); t = mla(t, u, o0 ? -0.8548327023450851166e-03 : o1 ? +0.1615320557049377171e-03 : -0.4725409828123619017e-03); t = mla(t, u, o0 ? +0.5223977625442188799e-02 : o1 ? +0.1915262325574875607e-01 : +0.2090315427924229266e-01); t = mla(t, u, o0 ? -0.2686617064513125569e-01 : o1 ? -0.1027818298486033455e+00 : -0.1052041921842776645e+00); t = mla(t, u, o0 ? +0.1128379167095512753e+00 : o1 ? -0.6366172819842503827e+00 : -0.6345351808766568347e+00); t = mla(t, u, o0 ? -0.3761263890318375380e+00 : o1 ? -0.1128379590648910469e+01 : -0.1129442929103524396e+01); d = ddmul_d2_d_d(t, u); d = ddadd2_d2_d2_d2(d, o0 ? dd(1.1283791670955125586, 1.5335459613165822674e-17) : o1 ? dd(3.4110644736196137587e-08, -2.4875650708323294246e-24) : dd(0.00024963035690526438285, -5.4362665034856259795e-21)); d = o0 ? ddmul_d2_d2_d(d, a) : ddadd_d2_d_d2(1.0, ddneg_d2_d2(expk2(d))); u = mulsign(o2 ? (d.x + d.y) : 1, s); u = xisnan(a) ? SLEEF_NAN : u; return u; } EXPORT CONST double xerfc_u15(double a) { double s = a, r = 0, t; Sleef_double2 u, d, x; a = fabsk(a); int o0 = a < 1.0, o1 = a < 2.2, o2 = a < 4.2, o3 = a < 27.3; u = o0 ? ddmul_d2_d_d(a, a) : o1 ? dd(a, 0) : dddiv_d2_d2_d2(dd(1, 0), dd(a, 0)); t = o0 ? +0.6801072401395386139e-20 : o1 ? +0.3438010341362585303e-12 : o2 ? -0.5757819536420710449e+2 : +0.2334249729638701319e+5; t = mla(t, u.x, o0 ? -0.2161766247570055669e-18 : o1 ? -0.1237021188160598264e-10 : o2 ? +0.4669289654498104483e+3 : -0.4695661044933107769e+5); t = mla(t, u.x, o0 ? +0.4695919173301595670e-17 : o1 ? +0.2117985839877627852e-09 : o2 ? -0.1796329879461355858e+4 : +0.3173403108748643353e+5); t = mla(t, u.x, o0 ? -0.9049140419888007122e-16 : o1 ? -0.2290560929177369506e-08 : o2 ? +0.4355892193699575728e+4 : +0.3242982786959573787e+4); t = mla(t, u.x, o0 ? +0.1634018903557410728e-14 : o1 ? +0.1748931621698149538e-07 : o2 ? -0.7456258884965764992e+4 : -0.2014717999760347811e+5); t = mla(t, u.x, o0 ? -0.2783485786333451745e-13 : o1 ? -0.9956602606623249195e-07 : o2 ? +0.9553977358167021521e+4 : +0.1554006970967118286e+5); t = mla(t, u.x, o0 ? +0.4463221276786415752e-12 : o1 ? +0.4330010240640327080e-06 : o2 ? -0.9470019905444229153e+4 : -0.6150874190563554293e+4); t = mla(t, u.x, o0 ? -0.6711366622850136563e-11 : o1 ? -0.1435050600991763331e-05 : o2 ? +0.7387344321849855078e+4 : +0.1240047765634815732e+4); t = mla(t, u.x, o0 ? +0.9422759050232662223e-10 : o1 ? +0.3460139479650695662e-05 : o2 ? -0.4557713054166382790e+4 : -0.8210325475752699731e+2); t = mla(t, u.x, o0 ? -0.1229055530100229098e-08 : o1 ? -0.4988908180632898173e-05 : o2 ? +0.2207866967354055305e+4 : +0.3242443880839930870e+2); t = mla(t, u.x, o0 ? +0.1480719281585086512e-07 : o1 ? -0.1308775976326352012e-05 : o2 ? -0.8217975658621754746e+3 : -0.2923418863833160586e+2); t = mla(t, u.x, o0 ? -0.1636584469123399803e-06 : o1 ? +0.2825086540850310103e-04 : o2 ? +0.2268659483507917400e+3 : +0.3457461732814383071e+0); t = mla(t, u.x, o0 ? +0.1646211436588923575e-05 : o1 ? -0.6393913713069986071e-04 : o2 ? -0.4633361260318560682e+2 : +0.5489730155952392998e+1); t = mla(t, u.x, o0 ? -0.1492565035840623511e-04 : o1 ? -0.2566436514695078926e-04 : o2 ? +0.9557380123733945965e+1 : +0.1559934132251294134e-2); t = mla(t, u.x, o0 ? +0.1205533298178967851e-03 : o1 ? +0.5895792375659440364e-03 : o2 ? -0.2958429331939661289e+1 : -0.1541741566831520638e+1); t = mla(t, u.x, o0 ? -0.8548327023450850081e-03 : o1 ? -0.1695715579163588598e-02 : o2 ? +0.1670329508092765480e+0 : +0.2823152230558364186e-5); t = mla(t, u.x, o0 ? +0.5223977625442187932e-02 : o1 ? +0.2089116434918055149e-03 : o2 ? +0.6096615680115419211e+0 : +0.6249999184195342838e+0); t = mla(t, u.x, o0 ? -0.2686617064513125222e-01 : o1 ? +0.1912855949584917753e-01 : o2 ? +0.1059212443193543585e-2 : +0.1741749416408701288e-8); d = ddmul_d2_d2_d(u, t); d = ddadd2_d2_d2_d2(d, o0 ? dd(0.11283791670955126141, -4.0175691625932118483e-18) : o1 ? dd(-0.10277263343147646779, -6.2338714083404900225e-18) : o2 ? dd(-0.50005180473999022439, 2.6362140569041995803e-17) : dd(-0.5000000000258444377, -4.0074044712386992281e-17)); d = ddmul_d2_d2_d2(d, u); d = ddadd2_d2_d2_d2(d, o0 ? dd(-0.37612638903183753802, 1.3391897206042552387e-17) : o1 ? dd(-0.63661976742916359662, 7.6321019159085724662e-18) : o2 ? dd(1.601106273924963368e-06, 1.1974001857764476775e-23) : dd(2.3761973137523364792e-13, -1.1670076950531026582e-29)); d = ddmul_d2_d2_d2(d, u); d = ddadd2_d2_d2_d2(d, o0 ? dd(1.1283791670955125586, 1.5335459613165822674e-17) : o1 ? dd(-1.1283791674717296161, 8.0896847755965377194e-17) : o2 ? dd(-0.57236496645145429341, 3.0704553245872027258e-17) : dd(-0.57236494292470108114, -2.3984352208056898003e-17)); x = ddmul_d2_d2_d(o1 ? d : dd(-a, 0), a); x = o1 ? x : ddadd2_d2_d2_d2(x, d); x = o0 ? ddsub_d2_d2_d2(dd(1, 0), x) : expk2(x); x = o1 ? x : ddmul_d2_d2_d2(x, u); r = o3 ? (x.x + x.y) : 0; if (s < 0) r = 2 - r; r = xisnan(s) ? SLEEF_NAN : r; return r; } #ifdef ENABLE_MAIN // gcc -w -DENABLE_MAIN -I../common sleefdp.c -lm #include int main(int argc, char **argv) { double d1 = atof(argv[1]); printf("arg1 = %.20g\n", d1); //int i1 = atoi(argv[1]); //double d2 = atof(argv[2]); //printf("arg2 = %.20g\n", d2); //printf("%d\n", (int)d2); #if 0 double d3 = atof(argv[3]); printf("arg3 = %.20g\n", d3); #endif //printf("%g\n", pow2i(i1)); //int exp = xexpfrexp(d1); //double r = xnextafter(d1, d2); //double r = xfma(d1, d2, d3); printf("test = %.20g\n", xcos_u1(d1)); //printf("test = %.20g\n", xlog(d1)); //r = nextafter(d1, d2); printf("corr = %.20g\n", cos(d1)); //printf("%.20g %.20g\n", xround(d1), xrint(d1)); //Sleef_double2 r = xsincospi_u35(d); //printf("%g, %g\n", (double)r.x, (double)r.y); } #endif sleef-3.3.1/src/libm/sleefld.c000066400000000000000000000243541333715643700161340ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include "misc.h" #ifdef DORENAME #include "rename.h" #endif #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif static INLINE CONST long double mlal(long double x, long double y, long double z) { return x * y + z; } static INLINE CONST long double xrintl(long double x) { return x < 0 ? (int)(x - 0.5) : (int)(x + 0.5); } static INLINE CONST int64_t xceill(long double x) { return (int64_t)x + (x < 0 ? 0 : 1); } static INLINE CONST long double xtruncl(long double x) { return (long double)(int)x; } static INLINE CONST int xisnanl(long double x) { return x != x; } static INLINE CONST int xisinfl(long double x) { return x == SLEEF_INFINITYl || x == -SLEEF_INFINITYl; } static INLINE CONST int xisminfl(long double x) { return x == -SLEEF_INFINITYl; } static INLINE CONST int xispinfl(long double x) { return x == SLEEF_INFINITYl; } static INLINE CONST long double xfabsl(long double x) { return x >= 0 ? x : -x; } // #ifndef NDEBUG static int checkfp(long double x) { if (xisinfl(x) || xisnanl(x)) return 1; return 0; } #endif static INLINE CONST long double upperl(long double d) { union { long double ld; uint32_t u[4]; } cnv; cnv.ld = d; cnv.u[0] = 0; return cnv.ld; } static INLINE CONST Sleef_longdouble2 dl(long double h, long double l) { Sleef_longdouble2 ret; ret.x = h; ret.y = l; return ret; } static INLINE CONST Sleef_longdouble2 dlnormalize_l2_l2(Sleef_longdouble2 t) { Sleef_longdouble2 s; s.x = t.x + t.y; s.y = t.x - s.x + t.y; return s; } static INLINE CONST Sleef_longdouble2 dlscale_l2_l2_l(Sleef_longdouble2 d, long double s) { Sleef_longdouble2 r; r.x = d.x * s; r.y = d.y * s; return r; } static INLINE CONST Sleef_longdouble2 dlneg_l2_l2(Sleef_longdouble2 d) { Sleef_longdouble2 r; r.x = -d.x; r.y = -d.y; return r; } static INLINE CONST Sleef_longdouble2 dladd_l2_l_l(long double x, long double y) { // |x| >= |y| Sleef_longdouble2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y) || xfabsl(x) >= xfabsl(y))) { fprintf(stderr, "[dladd_l2_l_l : %Lg, %Lg]\n", x, y); fflush(stderr); } #endif r.x = x + y; r.y = x - r.x + y; return r; } static INLINE CONST Sleef_longdouble2 dladd2_l2_l_l(long double x, long double y) { Sleef_longdouble2 r; r.x = x + y; long double v = r.x - x; r.y = (x - (r.x - v)) + (y - v); return r; } static INLINE CONST Sleef_longdouble2 dladd_l2_l2_l(Sleef_longdouble2 x, long double y) { // |x| >= |y| Sleef_longdouble2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y) || xfabsl(x.x) >= xfabsl(y))) { fprintf(stderr, "[dladd_l2_l2_l : %Lg %Lg]\n", x.x, y); fflush(stderr); } #endif r.x = x.x + y; r.y = x.x - r.x + y + x.y; return r; } static INLINE CONST Sleef_longdouble2 dladd2_l2_l2_l(Sleef_longdouble2 x, long double y) { // |x| >= |y| Sleef_longdouble2 r; r.x = x.x + y; long double v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y - v); r.y += x.y; return r; } static INLINE CONST Sleef_longdouble2 dladd_l2_l_l2(long double x, Sleef_longdouble2 y) { // |x| >= |y| Sleef_longdouble2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y.x) || xfabsl(x) >= xfabsl(y.x))) { fprintf(stderr, "[dladd_l2_l_l2 : %Lg %Lg]\n", x, y.x); fflush(stderr); } #endif r.x = x + y.x; r.y = x - r.x + y.x + y.y; return r; } static INLINE CONST Sleef_longdouble2 dladd2_l2_l_l2(long double x, Sleef_longdouble2 y) { Sleef_longdouble2 r; r.x = x + y.x; long double v = r.x - x; r.y = (x - (r.x - v)) + (y.x - v) + y.y; return r; } static INLINE CONST Sleef_longdouble2 dladd_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { // |x| >= |y| Sleef_longdouble2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || xfabsl(x.x) >= xfabsl(y.x))) { fprintf(stderr, "[dladd_l2_l2_l2 : %Lg %Lg]\n", x.x, y.x); fflush(stderr); } #endif r.x = x.x + y.x; r.y = x.x - r.x + y.x + x.y + y.y; return r; } static INLINE CONST Sleef_longdouble2 dladd2_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { Sleef_longdouble2 r; r.x = x.x + y.x; long double v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y.x - v); r.y += x.y + y.y; return r; } static INLINE CONST Sleef_longdouble2 dlsub_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { // |x| >= |y| Sleef_longdouble2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || xfabsl(x.x) >= xfabsl(y.x))) { fprintf(stderr, "[dlsub_l2_l2_l2 : %Lg %Lg]\n", x.x, y.x); fflush(stderr); } #endif r.x = x.x - y.x; r.y = x.x - r.x - y.x + x.y - y.y; return r; } static INLINE CONST Sleef_longdouble2 dldiv_l2_l2_l2(Sleef_longdouble2 n, Sleef_longdouble2 d) { long double t = 1.0 / d.x; long double dh = upperl(d.x), dl = d.x - dh; long double th = upperl(t ), tl = t - th; long double nhh = upperl(n.x), nhl = n.x - nhh; Sleef_longdouble2 q; q.x = n.x * t; long double u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); q.y = t * (n.y - q.x * d.y) + u; return q; } static INLINE CONST Sleef_longdouble2 dlmul_l2_l_l(long double x, long double y) { long double xh = upperl(x), xl = x - xh; long double yh = upperl(y), yl = y - yh; Sleef_longdouble2 r; r.x = x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; return r; } static INLINE CONST Sleef_longdouble2 dlmul_l2_l2_l(Sleef_longdouble2 x, long double y) { long double xh = upperl(x.x), xl = x.x - xh; long double yh = upperl(y ), yl = y - yh; Sleef_longdouble2 r; r.x = x.x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; return r; } static INLINE CONST Sleef_longdouble2 dlmul_l2_l2_l2(Sleef_longdouble2 x, Sleef_longdouble2 y) { long double xh = upperl(x.x), xl = x.x - xh; long double yh = upperl(y.x), yl = y.x - yh; Sleef_longdouble2 r; r.x = x.x * y.x; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; return r; } static INLINE CONST Sleef_longdouble2 dlsqu_l2_l2(Sleef_longdouble2 x) { long double xh = upperl(x.x), xl = x.x - xh; Sleef_longdouble2 r; r.x = x.x * x.x; r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); return r; } static INLINE CONST Sleef_longdouble2 dlrec_l2_l(long double d) { long double t = 1.0 / d; long double dh = upperl(d), dl = d - dh; long double th = upperl(t), tl = t - th; Sleef_longdouble2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); return q; } static INLINE CONST Sleef_longdouble2 dlrec_l2_l2(Sleef_longdouble2 d) { long double t = 1.0 / d.x; long double dh = upperl(d.x), dl = d.x - dh; long double th = upperl(t ), tl = t - th; Sleef_longdouble2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); return q; } /* static INLINE CONST Sleef_longdouble2 dlsqrt_l2_l2(Sleef_longdouble2 d) { long double t = sqrt(d.x + d.y); return dlscale_l2_l2_l(dlmul_l2_l2_l2(dladd2_l2_l2_l2(d, dlmul_l2_l_l(t, t)), dlrec_l2_l(t)), 0.5); } */ // EXPORT CONST Sleef_longdouble2 xsincospil_u05(long double d) { long double u, s, t; Sleef_longdouble2 r, x, s2; u = d * 4; int64_t q = xceill(u) & ~(int64_t)1; s = u - (long double)q; t = s; s = s * s; s2 = dlmul_l2_l_l(t, t); // u = 4.59265607313529833157632e-17L; u = mlal(u, s, -2.04096140520547829627419e-14L); u = mlal(u, s, 6.94845264320316515640316e-12L); u = mlal(u, s, -1.75724767308629210422023e-09L); u = mlal(u, s, 3.13361689037693212744991e-07L); u = mlal(u, s, -3.65762041821772284521155e-05L); u = mlal(u, s, 0.00249039457019272015784594L); x = dladd2_l2_l_l2(u * s, dl(-0.0807455121882807817044873L, -2.40179063154839769223037e-21L)); x = dladd2_l2_l2_l2(dlmul_l2_l2_l2(s2, x), dl(0.785398163397448309628202L, -1.25420305812534448752181e-20L)); x = dlmul_l2_l2_l(x, t); r.x = x.x + x.y; // u = -2.00423964577657539380734e-18L; u = mlal(u, s, 1.00185574457758689324113e-15L); u = mlal(u, s, -3.89807283423502620989528e-13L); u = mlal(u, s, 1.15011591257563133685341e-10L); u = mlal(u, s, -2.461136950493305818105e-08L); u = mlal(u, s, 3.59086044859150791782134e-06L); u = mlal(u, s, -0.00032599188692739001335938L); x = dladd2_l2_l_l2(u * s, dl(0.0158543442438155008529635L, -6.97556143018517384674258e-22L)); x = dladd2_l2_l2_l2(dlmul_l2_l2_l2(s2, x), dl(-0.308425137534042456829379L, -9.19882299434302978226668e-21L)); x = dladd2_l2_l2_l(dlmul_l2_l2_l2(x, s2), 1); r.y = x.x + x.y; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (xisinfl(d)) { r.x = r.y = SLEEF_NAN; } if (!xisinfl(d) && xfabsl(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } return r; } EXPORT CONST Sleef_longdouble2 xsincospil_u35(long double d) { long double u, s, t; Sleef_longdouble2 r; u = d * 4; int64_t q = xceill(u) & ~(int64_t)1; s = u - (long double)q; t = s; s = s * s; // u = -0.2023275819380976135024e-13L; u = mlal(u, s, +0.6948176964255957574946e-11L); u = mlal(u, s, -0.1757247450021535880723e-8L); u = mlal(u, s, +0.3133616889379195970541e-6L); u = mlal(u, s, -0.3657620418215300856408e-4L); u = mlal(u, s, +0.2490394570192717262476e-2L); u = mlal(u, s, -0.8074551218828078160284e-1L); u = mlal(u, s, +0.7853981633974483096282e+0L); r.x = u * t; // u = +0.9933418221428971922705e-15L; u = mlal(u, s, -0.3897923064055824005357e-12L); u = mlal(u, s, +0.1150115771521792692066e-9L); u = mlal(u, s, -0.2461136949725905367314e-7L); u = mlal(u, s, +0.3590860448589084195081e-5L); u = mlal(u, s, -0.3259918869273895914840e-3L); u = mlal(u, s, +0.1585434424381550079706e-1L); u = mlal(u, s, -0.3084251375340424568294e+0L); u = mlal(u, s, 1.0L); r.y = u; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (xisinfl(d)) { r.x = r.y = SLEEF_NAN; } if (!xisinfl(d) && xfabsl(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } return r; } sleef-3.3.1/src/libm/sleeflibm_footer.h.org000066400000000000000000000001111333715643700206120ustar00rootroot00000000000000#ifdef __cplusplus } #endif #undef IMPORT #endif // #ifndef __SLEEF_H__ sleef-3.3.1/src/libm/sleeflibm_header.h.org000066400000000000000000000211611333715643700205540ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) #ifndef __SLEEF_H__ #define __SLEEF_H__ #include #include #if (defined(__GNUC__) || defined(__CLANG__)) && !defined(__INTEL_COMPILER) #define CONST const #else #define CONST #endif #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #ifdef IMPORT_IS_EXPORT #define IMPORT __declspec(dllexport) #else // #ifdef IMPORT_IS_EXPORT #define IMPORT __declspec(dllimport) #if (defined(_MSC_VER)) #pragma comment(lib,"sleef.lib") #endif // #if (defined(_MSC_VER)) #endif // #ifdef IMPORT_IS_EXPORT #else // #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #define IMPORT #endif // #if (defined(__MINGW32__) || defined(__MINGW64__) || defined(__CYGWIN__) || defined(_MSC_VER)) && !defined(SLEEF_STATIC_LIBS) #if (defined(__GNUC__) || defined(__CLANG__)) && (defined(__i386__) || defined(__x86_64__)) #include #endif #if (defined(_MSC_VER)) #include #endif #if defined(__ARM_NEON__) || defined(__ARM_NEON) #include #endif #if defined(__ARM_FEATURE_SVE) #include #endif // #ifndef SLEEF_FP_ILOGB0 #define SLEEF_FP_ILOGB0 ((int)-2147483648) #endif #ifndef SLEEF_FP_ILOGBNAN #define SLEEF_FP_ILOGBNAN ((int)2147483647) #endif // IMPORT void *Sleef_malloc(size_t z); IMPORT void Sleef_free(void *ptr); IMPORT uint64_t Sleef_currentTimeMicros(); #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) IMPORT void Sleef_x86CpuID(int32_t out[4], uint32_t eax, uint32_t ecx); #endif // #ifndef Sleef_double2_DEFINED #define Sleef_double2_DEFINED typedef struct { double x, y; } Sleef_double2; #endif #ifndef Sleef_float2_DEFINED #define Sleef_float2_DEFINED typedef struct { float x, y; } Sleef_float2; #endif #ifndef Sleef_longdouble2_DEFINED #define Sleef_longdouble2_DEFINED typedef struct { long double x, y; } Sleef_longdouble2; #endif #if defined(ENABLEFLOAT128) && !defined(Sleef_quad2_DEFINED) #define Sleef_quad2_DEFINED typedef __float128 Sleef_quad; typedef struct { __float128 x, y; } Sleef_quad2; #endif #ifdef __cplusplus extern "C" { #endif IMPORT CONST double Sleef_sin_u35(double); IMPORT CONST double Sleef_cos_u35(double); IMPORT CONST Sleef_double2 Sleef_sincos_u35(double); IMPORT CONST double Sleef_tan_u35(double); IMPORT CONST double Sleef_asin_u35(double); IMPORT CONST double Sleef_acos_u35(double); IMPORT CONST double Sleef_atan_u35(double); IMPORT CONST double Sleef_atan2_u35(double, double); IMPORT CONST double Sleef_log_u35(double); IMPORT CONST double Sleef_cbrt_u35(double); IMPORT CONST double Sleef_sin_u10(double); IMPORT CONST double Sleef_cos_u10(double); IMPORT CONST Sleef_double2 Sleef_sincos_u10(double); IMPORT CONST double Sleef_tan_u10(double); IMPORT CONST double Sleef_asin_u10(double); IMPORT CONST double Sleef_acos_u10(double); IMPORT CONST double Sleef_atan_u10(double); IMPORT CONST double Sleef_atan2_u10(double, double); IMPORT CONST double Sleef_log_u10(double); IMPORT CONST double Sleef_cbrt_u10(double); IMPORT CONST double Sleef_exp_u10(double); IMPORT CONST double Sleef_pow_u10(double, double); IMPORT CONST double Sleef_sinh_u10(double); IMPORT CONST double Sleef_cosh_u10(double); IMPORT CONST double Sleef_tanh_u10(double); IMPORT CONST double Sleef_sinh_u35(double); IMPORT CONST double Sleef_cosh_u35(double); IMPORT CONST double Sleef_tanh_u35(double); IMPORT CONST double Sleef_asinh_u10(double); IMPORT CONST double Sleef_acosh_u10(double); IMPORT CONST double Sleef_atanh_u10(double); IMPORT CONST double Sleef_exp2_u10(double); IMPORT CONST double Sleef_exp10_u10(double); IMPORT CONST double Sleef_expm1_u10(double); IMPORT CONST double Sleef_log10_u10(double); IMPORT CONST double Sleef_log2_u10(double); IMPORT CONST double Sleef_log1p_u10(double); IMPORT CONST Sleef_double2 Sleef_sincospi_u05(double); IMPORT CONST Sleef_double2 Sleef_sincospi_u35(double); IMPORT CONST double Sleef_sinpi_u05(double); IMPORT CONST double Sleef_cospi_u05(double); IMPORT CONST double Sleef_ldexp(double, int); IMPORT CONST int Sleef_ilogb(double); IMPORT CONST double Sleef_fma(double, double, double); IMPORT CONST double Sleef_sqrt(double); IMPORT CONST double Sleef_sqrt_u05(double); IMPORT CONST double Sleef_sqrt_u35(double); IMPORT CONST double Sleef_hypot_u05(double, double); IMPORT CONST double Sleef_hypot_u35(double, double); IMPORT CONST double Sleef_fabs(double); IMPORT CONST double Sleef_copysign(double, double); IMPORT CONST double Sleef_fmax(double, double); IMPORT CONST double Sleef_fmin(double, double); IMPORT CONST double Sleef_fdim(double, double); IMPORT CONST double Sleef_trunc(double); IMPORT CONST double Sleef_floor(double); IMPORT CONST double Sleef_ceil(double); IMPORT CONST double Sleef_round(double); IMPORT CONST double Sleef_rint(double); IMPORT CONST double Sleef_nextafter(double, double); IMPORT CONST double Sleef_frfrexp(double); IMPORT CONST int Sleef_expfrexp(double); IMPORT CONST double Sleef_fmod(double, double); IMPORT CONST Sleef_double2 Sleef_modf(double); IMPORT CONST double Sleef_lgamma_u10(double); IMPORT CONST double Sleef_tgamma_u10(double); IMPORT CONST double Sleef_erf_u10(double); IMPORT CONST double Sleef_erfc_u15(double); IMPORT CONST float Sleef_sinf_u35(float); IMPORT CONST float Sleef_cosf_u35(float); IMPORT CONST Sleef_float2 Sleef_sincosf_u35(float); IMPORT CONST float Sleef_tanf_u35(float); IMPORT CONST float Sleef_asinf_u35(float); IMPORT CONST float Sleef_acosf_u35(float); IMPORT CONST float Sleef_atanf_u35(float); IMPORT CONST float Sleef_atan2f_u35(float, float); IMPORT CONST float Sleef_logf_u35(float); IMPORT CONST float Sleef_cbrtf_u35(float); IMPORT CONST float Sleef_sinf_u10(float); IMPORT CONST float Sleef_cosf_u10(float); IMPORT CONST Sleef_float2 Sleef_sincosf_u10(float); IMPORT CONST float Sleef_tanf_u10(float); IMPORT CONST float Sleef_asinf_u10(float); IMPORT CONST float Sleef_acosf_u10(float); IMPORT CONST float Sleef_atanf_u10(float); IMPORT CONST float Sleef_atan2f_u10(float, float); IMPORT CONST float Sleef_logf_u10(float); IMPORT CONST float Sleef_cbrtf_u10(float); IMPORT CONST float Sleef_expf_u10(float); IMPORT CONST float Sleef_powf_u10(float, float); IMPORT CONST float Sleef_sinhf_u10(float); IMPORT CONST float Sleef_coshf_u10(float); IMPORT CONST float Sleef_tanhf_u10(float); IMPORT CONST float Sleef_sinhf_u35(float); IMPORT CONST float Sleef_coshf_u35(float); IMPORT CONST float Sleef_tanhf_u35(float); IMPORT CONST float Sleef_asinhf_u10(float); IMPORT CONST float Sleef_acoshf_u10(float); IMPORT CONST float Sleef_atanhf_u10(float); IMPORT CONST float Sleef_exp2f_u10(float); IMPORT CONST float Sleef_exp10f_u10(float); IMPORT CONST float Sleef_expm1f_u10(float); IMPORT CONST float Sleef_log10f_u10(float); IMPORT CONST float Sleef_log2f_u10(float); IMPORT CONST float Sleef_log1pf_u10(float); IMPORT CONST Sleef_float2 Sleef_sincospif_u05(float); IMPORT CONST Sleef_float2 Sleef_sincospif_u35(float); IMPORT CONST float Sleef_sinpif_u05(float d); IMPORT CONST float Sleef_cospif_u05(float d); IMPORT CONST float Sleef_ldexpf(float, int); IMPORT CONST int Sleef_ilogbf(float); IMPORT CONST float Sleef_fmaf(float, float, float); IMPORT CONST float Sleef_sqrtf(float); IMPORT CONST float Sleef_sqrtf_u05(float); IMPORT CONST float Sleef_sqrtf_u35(float); IMPORT CONST float Sleef_hypotf_u05(float, float); IMPORT CONST float Sleef_hypotf_u35(float, float); IMPORT CONST float Sleef_fabsf(float); IMPORT CONST float Sleef_copysignf(float, float); IMPORT CONST float Sleef_fmaxf(float, float); IMPORT CONST float Sleef_fminf(float, float); IMPORT CONST float Sleef_fdimf(float, float); IMPORT CONST float Sleef_truncf(float); IMPORT CONST float Sleef_floorf(float); IMPORT CONST float Sleef_ceilf(float); IMPORT CONST float Sleef_roundf(float); IMPORT CONST float Sleef_rintf(float); IMPORT CONST float Sleef_nextafterf(float, float); IMPORT CONST float Sleef_frfrexpf(float); IMPORT CONST int Sleef_expfrexpf(float); IMPORT CONST float Sleef_fmodf(float, float); IMPORT CONST Sleef_float2 Sleef_modff(float); IMPORT CONST float Sleef_lgammaf_u10(float); IMPORT CONST float Sleef_tgammaf_u10(float); IMPORT CONST float Sleef_erff_u10(float); IMPORT CONST float Sleef_erfcf_u15(float); IMPORT CONST Sleef_longdouble2 Sleef_sincospil_u05(long double); IMPORT CONST Sleef_longdouble2 Sleef_sincospil_u35(long double); #if defined(Sleef_quad2_DEFINED) IMPORT CONST Sleef_quad2 Sleef_sincospiq_u05(Sleef_quad); IMPORT CONST Sleef_quad2 Sleef_sincospiq_u35(Sleef_quad); #endif sleef-3.3.1/src/libm/sleefqp.c000066400000000000000000000302761333715643700161550ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2017. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include "misc.h" #ifdef DORENAME #include "rename.h" #endif #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif static INLINE CONST Sleef_quad mlaq(Sleef_quad x, Sleef_quad y, Sleef_quad z) { return x * y + z; } static INLINE CONST int64_t xrintq(Sleef_quad x) { return x < 0 ? (int64_t)(x - 0.5) : (int64_t)(x + 0.5); } static INLINE CONST int64_t xceilq(Sleef_quad x) { return (int64_t)x + (x < 0 ? 0 : 1); } static INLINE CONST Sleef_quad xtruncq(Sleef_quad x) { return (Sleef_quad)(int64_t)x; } static INLINE CONST int xisnanq(Sleef_quad x) { return x != x; } static INLINE CONST int xisinfq(Sleef_quad x) { return x == SLEEF_INFINITYq || x == -SLEEF_INFINITYq; } static INLINE CONST int xisminfq(Sleef_quad x) { return x == -SLEEF_INFINITYq; } static INLINE CONST int xispinfq(Sleef_quad x) { return x == SLEEF_INFINITYq; } static INLINE CONST Sleef_quad xfabsq(Sleef_quad x) { union { Sleef_quad q; uint64_t u[2]; } cnv; cnv.q = x; cnv.u[1] &= 0x7fffffffffffffffULL; return cnv.q; } // #ifndef NDEBUG static int checkfp(Sleef_quad x) { if (xisinfq(x) || xisnanq(x)) return 1; return 0; } #endif static INLINE CONST Sleef_quad upperq(Sleef_quad d) { union { Sleef_quad q; uint64_t u[2]; } cnv; cnv.q = d; cnv.u[0] &= ~((1ULL << (112/2+1)) - 1); return cnv.q; } static INLINE CONST Sleef_quad2 dq(Sleef_quad h, Sleef_quad l) { Sleef_quad2 ret; ret.x = h; ret.y = l; return ret; } static INLINE CONST Sleef_quad2 dqnormalize_q2_q2(Sleef_quad2 t) { Sleef_quad2 s; s.x = t.x + t.y; s.y = t.x - s.x + t.y; return s; } static INLINE CONST Sleef_quad2 dqscale_q2_q2_q(Sleef_quad2 d, Sleef_quad s) { Sleef_quad2 r; r.x = d.x * s; r.y = d.y * s; return r; } static INLINE CONST Sleef_quad2 dqneg_q2_q2(Sleef_quad2 d) { Sleef_quad2 r; r.x = -d.x; r.y = -d.y; return r; } static INLINE CONST Sleef_quad2 dqadd_q2_q_q(Sleef_quad x, Sleef_quad y) { // |x| >= |y| Sleef_quad2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y) || xfabsq(x) >= xfabsq(y))) { fprintf(stderr, "[dqadd_q2_q_q : %g, %g]\n", (double)x, (double)y); fflush(stderr); } #endif r.x = x + y; r.y = x - r.x + y; return r; } static INLINE CONST Sleef_quad2 dqadd2_q2_q_q(Sleef_quad x, Sleef_quad y) { Sleef_quad2 r; r.x = x + y; Sleef_quad v = r.x - x; r.y = (x - (r.x - v)) + (y - v); return r; } static INLINE CONST Sleef_quad2 dqadd_q2_q2_q(Sleef_quad2 x, Sleef_quad y) { // |x| >= |y| Sleef_quad2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y) || xfabsq(x.x) >= xfabsq(y))) { fprintf(stderr, "[dqadd_q2_q2_q : %g %g]\n", (double)x.x, (double)y); fflush(stderr); } #endif r.x = x.x + y; r.y = x.x - r.x + y + x.y; return r; } static INLINE CONST Sleef_quad2 dqadd2_q2_q2_q(Sleef_quad2 x, Sleef_quad y) { // |x| >= |y| Sleef_quad2 r; r.x = x.x + y; Sleef_quad v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y - v); r.y += x.y; return r; } static INLINE CONST Sleef_quad2 dqadd_q2_q_q2(Sleef_quad x, Sleef_quad2 y) { // |x| >= |y| Sleef_quad2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y.x) || xfabsq(x) >= xfabsq(y.x))) { fprintf(stderr, "[dqadd_q2_q_q2 : %g %g]\n", (double)x, (double)y.x); fflush(stderr); } #endif r.x = x + y.x; r.y = x - r.x + y.x + y.y; return r; } static INLINE CONST Sleef_quad2 dqadd2_q2_q_q2(Sleef_quad x, Sleef_quad2 y) { Sleef_quad2 r; r.x = x + y.x; Sleef_quad v = r.x - x; r.y = (x - (r.x - v)) + (y.x - v) + y.y; return r; } static INLINE CONST Sleef_quad2 dqadd_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { // |x| >= |y| Sleef_quad2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || xfabsq(x.x) >= xfabsq(y.x))) { fprintf(stderr, "[dqadd_q2_q2_q2 : %g %g]\n", (double)x.x, (double)y.x); fflush(stderr); } #endif r.x = x.x + y.x; r.y = x.x - r.x + y.x + x.y + y.y; return r; } static INLINE CONST Sleef_quad2 dqadd2_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { Sleef_quad2 r; r.x = x.x + y.x; Sleef_quad v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y.x - v); r.y += x.y + y.y; return r; } static INLINE CONST Sleef_quad2 dqsub_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { // |x| >= |y| Sleef_quad2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || xfabsq(x.x) >= xfabsq(y.x))) { fprintf(stderr, "[dqsub_q2_q2_q2 : %g %g]\n", (double)x.x, (double)y.x); fflush(stderr); } #endif r.x = x.x - y.x; r.y = x.x - r.x - y.x + x.y - y.y; return r; } static INLINE CONST Sleef_quad2 dqdiv_q2_q2_q2(Sleef_quad2 n, Sleef_quad2 d) { Sleef_quad t = 1.0 / d.x; Sleef_quad dh = upperq(d.x), dl = d.x - dh; Sleef_quad th = upperq(t ), tl = t - th; Sleef_quad nhh = upperq(n.x), nhl = n.x - nhh; Sleef_quad2 q; q.x = n.x * t; Sleef_quad u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); q.y = t * (n.y - q.x * d.y) + u; return q; } static INLINE CONST Sleef_quad2 dqmul_q2_q_q(Sleef_quad x, Sleef_quad y) { Sleef_quad xh = upperq(x), xl = x - xh; Sleef_quad yh = upperq(y), yl = y - yh; Sleef_quad2 r; r.x = x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; return r; } static INLINE CONST Sleef_quad2 dqmul_q2_q2_q(Sleef_quad2 x, Sleef_quad y) { Sleef_quad xh = upperq(x.x), xl = x.x - xh; Sleef_quad yh = upperq(y ), yl = y - yh; Sleef_quad2 r; r.x = x.x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; return r; } static INLINE CONST Sleef_quad2 dqmul_q2_q2_q2(Sleef_quad2 x, Sleef_quad2 y) { Sleef_quad xh = upperq(x.x), xl = x.x - xh; Sleef_quad yh = upperq(y.x), yl = y.x - yh; Sleef_quad2 r; r.x = x.x * y.x; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; return r; } static INLINE CONST Sleef_quad2 dqsqu_q2_q2(Sleef_quad2 x) { Sleef_quad xh = upperq(x.x), xl = x.x - xh; Sleef_quad2 r; r.x = x.x * x.x; r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); return r; } static INLINE CONST Sleef_quad2 dqrec_q2_q(Sleef_quad d) { Sleef_quad t = 1.0 / d; Sleef_quad dh = upperq(d), dl = d - dh; Sleef_quad th = upperq(t), tl = t - th; Sleef_quad2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); return q; } static INLINE CONST Sleef_quad2 dqrec_q2_q2(Sleef_quad2 d) { Sleef_quad t = 1.0 / d.x; Sleef_quad dh = upperq(d.x), dl = d.x - dh; Sleef_quad th = upperq(t ), tl = t - th; Sleef_quad2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); return q; } /* static INLINE CONST Sleef_quad2 dqsqrt_q2_q2(Sleef_quad2 d) { Sleef_quad t = sqrt(d.x + d.y); return dqscale_q2_q2_q(dqmul_q2_q2_q2(dqadd2_q2_q2_q2(d, dqmul_q2_q_q(t, t)), dqrec_q2_q(t)), 0.5); } */ // EXPORT CONST Sleef_quad2 xsincospiq_u05(Sleef_quad d) { Sleef_quad u, s, t; Sleef_quad2 r, x, s2; u = d * 4; int64_t q = xceilq(u) & ~(int64_t)1; s = u - (Sleef_quad)q; t = s; s = s * s; s2 = dqmul_q2_q_q(t, t); // u = +0.1528321016188828732764080161368244291e-27Q; u = mlaq(u, s, -0.1494741498689376415859233754050616110e-24Q); u = mlaq(u, s, +0.1226149947504428931621181953791777769e-21Q); u = mlaq(u, s, -0.8348589834426964519785265770009675533e-19Q); u = mlaq(u, s, +0.4628704628834415551415078707261146069e-16Q); u = mlaq(u, s, -0.2041026339664143925641158896030605061e-13Q); u = mlaq(u, s, +0.6948453273886629408492386065037620114e-11Q); u = mlaq(u, s, -0.1757247673443401045145682042627557066e-8Q); u = mlaq(u, s, +0.3133616890378121520950407496603902388e-6Q); u = mlaq(u, s, -0.3657620418217725078660518698299784909e-4Q); u = mlaq(u, s, +0.2490394570192720160015798421577395304e-2Q); x = dqadd2_q2_q_q2(u * s, dq(-0.08074551218828078170696957048724322192457Q, 5.959584458773288360696286320980429277618e-36)); x = dqadd2_q2_q2_q2(dqmul_q2_q2_q2(s2, x), dq(0.7853981633974483096156608458198756993698Q, 2.167745574452451779709844565881105067311e-35Q)); x = dqmul_q2_q2_q(x, t); r.x = x.x + x.y; // u = -0.4616472554003168470361503708527464705e-29Q; u = mlaq(u, s, +0.4891528531228245577148587028696897180e-26Q); u = mlaq(u, s, -0.4377345071482935585011339656701961637e-23Q); u = mlaq(u, s, +0.3278483561449753435303463083506802784e-20Q); u = mlaq(u, s, -0.2019653396886554861865456720993185772e-17Q); u = mlaq(u, s, +0.1001886461636271957275884859852184250e-14Q); u = mlaq(u, s, -0.3898073171259675439843028673969857173e-12Q); u = mlaq(u, s, +0.1150115912797405152263176921581706121e-9Q); u = mlaq(u, s, -0.2461136950494199754009084018126527316e-7Q); u = mlaq(u, s, +0.3590860448591510079069203991167071234e-5Q); u = mlaq(u, s, -0.3259918869273900136414318317506198622e-3Q); x = dqadd2_q2_q_q2(u * s, dq(0.01585434424381550085228521039855226376329Q, 6.529088663284413499535484912972485728198e-38Q)); x = dqadd2_q2_q2_q2(dqmul_q2_q2_q2(s2, x), dq(-0.308425137534042456838577843746129712906Q, -1.006808646313642786855469666154064243572e-35Q)); x = dqadd2_q2_q2_q(dqmul_q2_q2_q2(x, s2), 1); r.y = x.x + x.y; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (xisinfq(d)) { r.x = r.y = SLEEF_NANq; } if (!xisinfq(d) && xfabsq(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } return r; } EXPORT CONST Sleef_quad2 xsincospiq_u35(Sleef_quad d) { Sleef_quad u, s, t; Sleef_quad2 r; u = d * 4; int64_t q = xceilq(u) & ~(int64_t)1; s = u - (Sleef_quad)q; t = s; s = s * s; // u = -0.1485963032785725729464918728185622156e-24Q; u = mlaq(u, s, +0.1226127943866088943202201676879490635e-21Q); u = mlaq(u, s, -0.8348589518463078609690110857435995326e-19Q); u = mlaq(u, s, +0.4628704628547538824855302470312741438e-16Q); u = mlaq(u, s, -0.2041026339663972432248777826778586936e-13Q); u = mlaq(u, s, +0.6948453273886628726907826757576187848e-11Q); u = mlaq(u, s, -0.1757247673443401044967978719804318982e-8Q); u = mlaq(u, s, +0.3133616890378121520950114757196589206e-6Q); u = mlaq(u, s, -0.3657620418217725078660518414453815240e-4Q); u = mlaq(u, s, +0.2490394570192720160015798421435124000e-2Q); u = mlaq(u, s, -0.8074551218828078170696957048724041729e-1Q); u = mlaq(u, s, +0.7853981633974483096156608458198756994e+0Q); r.x = u * t; // u = +0.4862670988511544771355006256522366302e-26Q; u = mlaq(u, s, -0.4377265452147065611484052550741141029e-23Q); u = mlaq(u, s, +0.3278483433857326331665386021267750285e-20Q); u = mlaq(u, s, -0.2019653396755055912482006994709659430e-17Q); u = mlaq(u, s, +0.1001886461636180795663169552615123249e-14Q); u = mlaq(u, s, -0.3898073171259675007871885150022866077e-12Q); u = mlaq(u, s, +0.1150115912797405152123832255915284811e-9Q); u = mlaq(u, s, -0.2461136950494199754008784937314856168e-7Q); u = mlaq(u, s, +0.3590860448591510079069203583263258862e-5Q); u = mlaq(u, s, -0.3259918869273900136414318317180623832e-3Q); u = mlaq(u, s, +0.1585434424381550085228521039855096075e-1Q); u = mlaq(u, s, -0.3084251375340424568385778437461297129e+0Q); u = mlaq(u, s, 1.0Q); r.y = u; // if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (xisinfq(d)) { r.x = r.y = SLEEF_NANq; } if (!xisinfq(d) && xfabsq(d) > TRIGRANGEMAX3) { r.x = r.y = 0; } return r; } // #ifdef ENABLE_MAIN #include #include int main(int argc, char **argv) { Sleef_quad a = -8.3998726984803832684266802333309369056312711821029e-09Q; Sleef_quad2 q = xsincospiq_u05(a); printf(" "); printf128(q.x); printf("\n"); /* printf128(0.1Q); printf("\n"); Sleef_quad2 q2 = dqmul_q2_q_q(0.1Q, 0.1Q); printf128(q2.x); printf("\n"); printf128(q2.y); printf("\n"); */ /* printf("%s\n", toBCq(0.1Q)); printf("%s\n", toBCq(upperq(0.1Q))); printf("%s\n", toBCq(0.1Q-upperq(0.1Q))); Sleef_quad2 q2 = dqmul_q2_q_q(0.1Q, 0.1Q); printf("%s + ", toBCq(q2.x)); printf("%s\n", toBCq(q2.y)); */ } #endif sleef-3.3.1/src/libm/sleefsimddp.c000066400000000000000000004321431333715643700170140ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include "misc.h" extern const double rempitabdp[]; #define __SLEEFSIMDDP_C__ #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesse2_gnuabi.h" #else #include "renamesse2.h" #endif #endif #endif #ifdef ENABLE_SSE4 #define CONFIG 4 #include "helpersse2.h" #ifdef DORENAME #include "renamesse4.h" #endif #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx_gnuabi.h" #else #include "renameavx.h" #endif #endif #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamefma4_gnuabi.h" #else #include "renamefma4.h" #endif #endif #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx2_gnuabi.h" #else #include "renameavx2.h" #endif #endif #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #ifdef DORENAME #include "renameavx2128.h" #endif #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx512f_gnuabi.h" #else #include "renameavx512f.h" #endif #endif #endif #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameadvsimd_gnuabi.h" #else #include "renameadvsimd.h" #endif #endif #endif #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #ifdef DORENAME #include "renamevsx.h" #endif #endif // #ifdef ENABLE_VECEXT #define CONFIG 1 #include "helpervecext.h" #ifdef DORENAME #include "renamevecext.h" #endif #endif #ifdef ENABLE_PUREC #define CONFIG 1 #include "helperpurec.h" #ifdef DORENAME #include "renamepurec.h" #endif #endif #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesve_gnuabi.h" #else #include "renamesve.h" #endif /* ENABLE_GNUABI */ #endif /* DORENAME */ #endif /* ENABLE_SVE */ // #include "dd.h" // static INLINE vopmask vnot_vo64_vo64(vopmask x) { return vxor_vo_vo_vo(x, veq64_vo_vm_vm(vcast_vm_i_i(0, 0), vcast_vm_i_i(0, 0))); } static INLINE CONST vopmask vsignbit_vo_vd(vdouble d) { return veq64_vo_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))); } // return d0 < d1 ? x : y static INLINE CONST vint vsel_vi_vd_vd_vi_vi(vdouble d0, vdouble d1, vint x, vint y) { return vsel_vi_vo_vi_vi(vcast_vo32_vo64(vlt_vo_vd_vd(d0, d1)), x, y); } // return d0 < 0 ? x : 0 static INLINE CONST vint vsel_vi_vd_vi(vdouble d, vint x) { return vand_vi_vo_vi(vcast_vo32_vo64(vsignbit_vo_vd(d)), x); } static INLINE CONST vopmask visnegzero_vo_vd(vdouble d) { return veq64_vo_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))); } static INLINE CONST vopmask visnumber_vo_vd(vdouble x) { return vandnot_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, x)); } static INLINE CONST vmask vsignbit_vm_vd(vdouble d) { return vand_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))); } static INLINE CONST vdouble vmulsign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(x), vsignbit_vm_vd(y))); } static INLINE CONST vdouble vcopysign_vd_vd_vd(vdouble x, vdouble y) { return vreinterpret_vd_vm(vxor_vm_vm_vm(vandnot_vm_vm_vm(vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(x)), vand_vm_vm_vm (vreinterpret_vm_vd(vcast_vd_d(-0.0)), vreinterpret_vm_vd(y)))); } static INLINE CONST vdouble vsign_vd_vd(vdouble d) { return vmulsign_vd_vd_vd(vcast_vd_d(1.0), d); } static INLINE CONST vdouble vpow2i_vd_vi(vint q) { q = vadd_vi_vi_vi(vcast_vi_i(0x3ff), q); vint2 r = vcastu_vi2_vi(q); return vreinterpret_vd_vi2(vsll_vi2_vi2_i(r, 20)); } static INLINE CONST vdouble vldexp_vd_vd_vi(vdouble x, vint q) { vint m = vsra_vi_vi_i(q, 31); m = vsll_vi_vi_i(vsub_vi_vi_vi(vsra_vi_vi_i(vadd_vi_vi_vi(m, q), 9), m), 7); q = vsub_vi_vi_vi(q, vsll_vi_vi_i(m, 2)); m = vadd_vi_vi_vi(vcast_vi_i(0x3ff), m); m = vandnot_vi_vo_vi(vgt_vo_vi_vi(vcast_vi_i(0), m), m); m = vsel_vi_vo_vi_vi(vgt_vo_vi_vi(m, vcast_vi_i(0x7ff)), vcast_vi_i(0x7ff), m); vint2 r = vcastu_vi2_vi(m); vdouble y = vreinterpret_vd_vi2(vsll_vi2_vi2_i(r, 20)); return vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(x, y), y), y), y), vpow2i_vd_vi(q)); } static INLINE CONST vdouble vldexp2_vd_vd_vi(vdouble d, vint e) { return vmul_vd_vd_vd(vmul_vd_vd_vd(d, vpow2i_vd_vi(vsra_vi_vi_i(e, 1))), vpow2i_vd_vi(vsub_vi_vi_vi(e, vsra_vi_vi_i(e, 1)))); } static INLINE CONST vdouble vldexp3_vd_vd_vi(vdouble d, vint q) { return vreinterpret_vd_vi2(vadd_vi2_vi2_vi2(vreinterpret_vi2_vd(d), vsll_vi2_vi2_i(vcastu_vi2_vi(q), 20))); } #ifndef ENABLE_AVX512F static INLINE CONST vint vilogbk_vi_vd(vdouble d) { vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(4.9090934652977266E-91)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(vcast_vd_d(2.037035976334486E90), d), d); vint q = vcastu_vi_vi2(vreinterpret_vi2_vd(d)); q = vand_vi_vi_vi(q, vcast_vi_i(((1 << 12)-1) << 20)); q = vsrl_vi_vi_i(q, 20); q = vsub_vi_vi_vi(q, vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vcast_vi_i(300 + 0x3ff), vcast_vi_i(0x3ff))); return q; } static INLINE CONST vint vilogb2k_vi_vd(vdouble d) { vint q = vcastu_vi_vi2(vreinterpret_vi2_vd(d)); q = vsrl_vi_vi_i(q, 20); q = vand_vi_vi_vi(q, vcast_vi_i(0x7ff)); q = vsub_vi_vi_vi(q, vcast_vi_i(0x3ff)); return q; } #endif static INLINE CONST vopmask visint_vo_vd(vdouble d) { vdouble x = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0 / (1LL << 31)))); x = vmla_vd_vd_vd_vd(vcast_vd_d(-(double)(1LL << 31)), x, d); return vor_vo_vo_vo(veq_vo_vd_vd(vtruncate_vd_vd(x), x), vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1LL << 53))); } static INLINE CONST vopmask visodd_vo_vd(vdouble d) { vdouble x = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0 / (1LL << 31)))); x = vmla_vd_vd_vd_vd(vcast_vd_d(-(double)(1LL << 31)), x, d); return vand_vo_vo_vo(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vtruncate_vi_vd(x), vcast_vi_i(1)), vcast_vi_i(1))), vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1LL << 53))); } // EXPORT CONST vdouble xldexp(vdouble x, vint q) { return vldexp_vd_vd_vi(x, q); } EXPORT CONST vint xilogb(vdouble d) { vdouble e = vcast_vd_vi(vilogbk_vi_vd(vabs_vd_vd(d))); e = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_FP_ILOGB0), e); e = vsel_vd_vo_vd_vd(visnan_vo_vd(d), vcast_vd_d(SLEEF_FP_ILOGBNAN), e); e = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vcast_vd_d(INT_MAX), e); return vrint_vi_vd(e); } #ifdef ENABLE_SVE typedef __sizeless_struct { vdouble d; vint i; } di_t; typedef __sizeless_struct { vdouble2 dd; vint i; } ddi_t; #else typedef struct { vdouble d; vint i; } di_t; typedef struct { vdouble2 dd; vint i; } ddi_t; #endif static INLINE CONST di_t rempisub(vdouble x) { #ifdef FULL_FP_ROUNDING vdouble y = vrint_vd_vd(vmul_vd_vd_vd(x, vcast_vd_d(4))); vint vi = vtruncate_vi_vd(vsub_vd_vd_vd(y, vmul_vd_vd_vd(vrint_vd_vd(x), vcast_vd_d(4)))); di_t ret = { vsub_vd_vd_vd(x, vmul_vd_vd_vd(y, vcast_vd_d(0.25))), vi }; #else vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(1LL << 28), vtruncate_vd_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (1LL << 28)))))); vint vi = vadd_vi_vi_vi(vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(x, vcast_vd_d(0))), vcast_vi_i(4), vcast_vi_i(3)), vtruncate_vi_vd(vmul_vd_vd_vd(fr, vcast_vd_d(8)))); vi = vsra_vi_vi_i(vsub_vi_vi_vi(vand_vi_vi_vi(vcast_vi_i(7), vi), vcast_vi_i(3)), 1); fr = vsub_vd_vd_vd(fr, vmul_vd_vd_vd(vcast_vd_d(0.25), vtruncate_vd_vd(vmla_vd_vd_vd_vd(fr, vcast_vd_d(4), vmulsign_vd_vd_vd(vcast_vd_d(0.5), x))))); fr = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(fr), vcast_vd_d(0.25)), vsub_vd_vd_vd(fr, vmulsign_vd_vd_vd(vcast_vd_d(0.5), x)), fr); fr = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(fr), vcast_vd_d(1e+10)), vcast_vd_d(0), fr); vopmask o = veq_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(0.12499999999999998612)); fr = vsel_vd_vo_vd_vd(o, x, fr); vi = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vcast_vi_i(0), vi); di_t ret = { fr, vi }; #endif return ret; } static INLINE CONST ddi_t rempi(vdouble a) { vdouble2 x, y, z; vint ex = vilogb2k_vi_vd(a); #if defined(ENABLE_AVX512F) ex = vandnot_vi_vi_vi(vsra_vi_vi_i(ex, 31), ex); ex = vand_vi_vi_vi(ex, vcast_vi_i(1023)); #endif ex = vsub_vi_vi_vi(ex, vcast_vi_i(55)); vint q = vand_vi_vo_vi(vgt_vo_vi_vi(ex, vcast_vi_i(700-55)), vcast_vi_i(-64)); a = vldexp3_vd_vd_vi(a, q); ex = vandnot_vi_vi_vi(vsra_vi_vi_i(ex, 31), ex); ex = vsll_vi_vi_i(ex, 2); x = ddmul_vd2_vd_vd(a, vgather_vd_p_vi(rempitabdp, ex)); di_t di = rempisub(x.x); q = di.i; x.x = di.d; x = ddnormalize_vd2_vd2(x); y = ddmul_vd2_vd_vd(a, vgather_vd_p_vi(rempitabdp+1, ex)); x = ddadd2_vd2_vd2_vd2(x, y); di = rempisub(x.x); q = vadd_vi_vi_vi(q, di.i); x.x = di.d; x = ddnormalize_vd2_vd2(x); y = vcast_vd2_vd_vd(vgather_vd_p_vi(rempitabdp+2, ex), vgather_vd_p_vi(rempitabdp+3, ex)); y = ddmul_vd2_vd2_vd(y, a); x = ddadd2_vd2_vd2_vd2(x, y); x = ddnormalize_vd2_vd2(x); x = ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(3.141592653589793116*2, 1.2246467991473532072e-16*2)); vopmask o = vlt_vo_vd_vd(vabs_vd_vd(a), vcast_vd_d(0.7)); x.x = vsel_vd_vo_vd_vd(o, a, x.x); x.y = vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(x.y))); ddi_t ret = { x, q }; return ret; } EXPORT CONST vdouble xsin(vdouble d) { vdouble u, s, r = d; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI))); ql = vrint_vi_vd(dql); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2), d); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); vdouble dql = vrint_vd_vd(vmlapn_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), dqh)); ql = vrint_vi_vd(dql); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A), d); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B), d); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C), d); d = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D), d); } else { ddi_t ddi = rempi(d); ql = vand_vi_vi_vi(ddi.i, vcast_vi_i(3)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(ddi.dd.x, vcast_vd_d(0))), vcast_vi_i(2), vcast_vi_i(1))); ql = vsra_vi_vi_i(ql, 2); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddi.i, vcast_vi_i(1)), vcast_vi_i(1)); vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), ddi.dd.x), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), ddi.dd.x)); x = ddadd2_vd2_vd2_vd2(ddi.dd, x); ddi.dd = vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddi.dd); d = vadd_vd_vd_vd(ddi.dd.x, ddi.dd.y); d = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(r), visnan_vo_vd(r)), vreinterpret_vm_vd(d))); } s = vmul_vd_vd_vd(d, d); d = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(d))); u = vcast_vd_d(-7.97255955009037868891952e-18); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.81009972710863200091251e-15)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-7.64712219118158833288484e-13)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1.60590430605664501629054e-10)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.50521083763502045810755e-08)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573192239198747630416e-06)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000198412698412696162806809)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00833333333333332974823815)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808)); u = vadd_vd_vd_vd(vmul_vd_vd_vd(s, vmul_vd_vd_vd(u, d)), d); u = vsel_vd_vo_vd_vd(visnegzero_vo_vd(r), r, u); return u; } EXPORT CONST vdouble xsin_u1(vdouble d) { vdouble u; vdouble2 s, t, x; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { const vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI))); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2))); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); const vdouble dql = vrint_vd_vd(vmlapn_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), dqh)); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D))); } else { ddi_t ddi = rempi(d); ql = vand_vi_vi_vi(ddi.i, vcast_vi_i(3)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(ddi.dd.x, vcast_vd_d(0))), vcast_vi_i(2), vcast_vi_i(1))); ql = vsra_vi_vi_i(ql, 2); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddi.i, vcast_vi_i(1)), vcast_vi_i(1)); vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), ddi.dd.x), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), ddi.dd.x)); x = ddadd2_vd2_vd2_vd2(ddi.dd, x); ddi.dd = vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddi.dd); s = ddnormalize_vd2_vd2(ddi.dd); s.x = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(s.x))); } t = s; s = ddsqu_vd2_vd2(s); u = vcast_vd_d(2.72052416138529567917983e-15); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-7.6429259411395447190023e-13)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(1.60589370117277896211623e-10)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-2.5052106814843123359368e-08)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.75573192104428224777379e-06)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.000198412698412046454654947)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00833333333333318056201922)); x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, s.x)), s)); u = ddmul_vd_vd2_vd2(t, x); u = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(u))); u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); return u; } EXPORT CONST vdouble xcos(vdouble d) { vdouble u, s, r = d; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vmla_vd_vd_vd_vd(vcast_vd_d(2), vrint_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5))), vcast_vd_d(1)); ql = vrint_vi_vd(dql); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), d); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 23)), vcast_vd_d(-M_1_PI / (1 << 24)))); ql = vrint_vi_vd(vadd_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI)), vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-(1 << 23)), vcast_vd_d(-0.5)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vcast_vi_i(1)); vdouble dql = vcast_vd_vi(ql); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), d); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), d); d = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), d); d = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), d); d = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), d); } else { ddi_t ddi = rempi(d); ql = vand_vi_vi_vi(ddi.i, vcast_vi_i(3)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(ddi.dd.x, vcast_vd_d(0))), vcast_vi_i(8), vcast_vi_i(7))); ql = vsra_vi_vi_i(ql, 1); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddi.i, vcast_vi_i(1)), vcast_vi_i(0)); vdouble y = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(ddi.dd.x, vcast_vd_d(0)), vcast_vd_d(0), vcast_vd_d(-1)); vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), y), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), y)); x = ddadd2_vd2_vd2_vd2(ddi.dd, x); ddi.dd = vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddi.dd); d = vadd_vd_vd_vd(ddi.dd.x, ddi.dd.y); d = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(r), visnan_vo_vd(r)), vreinterpret_vm_vd(d))); } s = vmul_vd_vd_vd(d, d); d = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(d))); u = vcast_vd_d(-7.97255955009037868891952e-18); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.81009972710863200091251e-15)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-7.64712219118158833288484e-13)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1.60590430605664501629054e-10)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.50521083763502045810755e-08)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573192239198747630416e-06)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000198412698412696162806809)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00833333333333332974823815)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666657414808)); u = vadd_vd_vd_vd(vmul_vd_vd_vd(s, vmul_vd_vd_vd(u, d)), d); return u; } EXPORT CONST vdouble xcos_u1(vdouble d) { vdouble u; vdouble2 s, t, x; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vrint_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI), vcast_vd_d(-0.5))); dql = vmla_vd_vd_vd_vd(vcast_vd_d(2), dql, vcast_vd_d(1)); ql = vrint_vi_vd(dql); s = ddadd2_vd2_vd_vd(d, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmla_vd_vd_vd_vd(d, vcast_vd_d(M_1_PI / (1 << 23)), vcast_vd_d(-M_1_PI / (1 << 24)))); ql = vrint_vi_vd(vadd_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(M_1_PI)), vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-(1 << 23)), vcast_vd_d(-0.5)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vcast_vi_i(1)); const vdouble dql = vcast_vd_vi(ql); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); s = ddadd2_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); } else { ddi_t ddi = rempi(d); ql = vand_vi_vi_vi(ddi.i, vcast_vi_i(3)); ql = vadd_vi_vi_vi(vadd_vi_vi_vi(ql, ql), vsel_vi_vo_vi_vi(vcast_vo32_vo64(vgt_vo_vd_vd(ddi.dd.x, vcast_vd_d(0))), vcast_vi_i(8), vcast_vi_i(7))); ql = vsra_vi_vi_i(ql, 1); vopmask o = veq_vo_vi_vi(vand_vi_vi_vi(ddi.i, vcast_vi_i(1)), vcast_vi_i(0)); vdouble y = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(ddi.dd.x, vcast_vd_d(0)), vcast_vd_d(0), vcast_vd_d(-1)); vdouble2 x = vcast_vd2_vd_vd(vmulsign_vd_vd_vd(vcast_vd_d(-3.141592653589793116 * 0.5), y), vmulsign_vd_vd_vd(vcast_vd_d(-1.2246467991473532072e-16 * 0.5), y)); x = ddadd2_vd2_vd2_vd2(ddi.dd, x); ddi.dd = vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(o), x, ddi.dd); s = ddnormalize_vd2_vd2(ddi.dd); s.x = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(s.x))); } t = s; s = ddsqu_vd2_vd2(s); u = vcast_vd_d(2.72052416138529567917983e-15); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-7.6429259411395447190023e-13)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(1.60589370117277896211623e-10)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-2.5052106814843123359368e-08)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.75573192104428224777379e-06)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.000198412698412046454654947)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00833333333333318056201922)); x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(-0.166666666666666657414808), vmul_vd_vd_vd(u, s.x)), s)); u = ddmul_vd_vd2_vd2(t, x); u = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(0))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(u))); return u; } #ifdef ENABLE_GNUABI #define TYPE2_FUNCATR static INLINE CONST #define TYPE6_FUNCATR static INLINE CONST #define SQRTU05_FUNCATR static INLINE CONST #define XSINCOS sincosk #define XSINCOS_U1 sincosk_u1 #define XSINCOSPI_U05 sincospik_u05 #define XSINCOSPI_U35 sincospik_u35 #define XMODF modfk #else #define TYPE2_FUNCATR EXPORT #define TYPE6_FUNCATR EXPORT CONST #define SQRTU05_FUNCATR EXPORT CONST #define XSINCOS xsincos #define XSINCOS_U1 xsincos_u1 #define XSINCOSPI_U05 xsincospi_u05 #define XSINCOSPI_U35 xsincospi_u35 #define XMODF xmodf #endif TYPE2_FUNCATR vdouble2 XSINCOS(vdouble d) { vopmask o; vdouble u, t, rx, ry, s; vdouble2 r; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), s); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); ql = vrint_vi_vd(dql); s = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), s); s = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), s); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), s); s = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), s); s = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), s); s = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), s); } else { ddi_t ddi = rempi(d); ql = ddi.i; s = vadd_vd_vd_vd(ddi.dd.x, ddi.dd.y); s = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(s))); } t = s; s = vmul_vd_vd_vd(s, s); u = vcast_vd_d(1.58938307283228937328511e-10); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.50506943502539773349318e-08)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573131776846360512547e-06)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000198412698278911770864914)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0083333333333191845961746)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.166666666666666130709393)); rx = vmla_vd_vd_vd_vd(vmul_vd_vd_vd(u, s), t, t); rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); u = vcast_vd_d(-1.13615350239097429531523e-11); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.08757471207040055479366e-09)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.75573144028847567498567e-07)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.48015872890001867311915e-05)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.00138888888888714019282329)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0416666666666665519592062)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.5)); ry = vmla_vd_vd_vd_vd(s, u, vcast_vd_d(1)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(0))); r.x = vsel_vd_vo_vd_vd(o, rx, ry); r.y = vsel_vd_vo_vd_vd(o, ry, rx); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(2))); r.x = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.x))); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2))); r.y = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.y))); return r; } TYPE2_FUNCATR vdouble2 XSINCOS_U1(vdouble d) { vopmask o; vdouble u, rx, ry; vdouble2 r, s, t, x; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { const vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); const vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); s = ddadd_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); } else { ddi_t ddi = rempi(d); ql = ddi.i; s = ddi.dd; o = vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)); s.x = vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(s.x))); s.y = vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(s.y))); } t = s; s.x = ddsqu_vd_vd2(s); u = vcast_vd_d(1.58938307283228937328511e-10); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-2.50506943502539773349318e-08)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.75573131776846360512547e-06)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.000198412698278911770864914)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0083333333333191845961746)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.166666666666666130709393)); u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(s.x, t.x)); x = ddadd_vd2_vd2_vd(t, u); rx = vadd_vd_vd_vd(x.x, x.y); rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); u = vcast_vd_d(-1.13615350239097429531523e-11); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.08757471207040055479366e-09)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-2.75573144028847567498567e-07)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.48015872890001867311915e-05)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.00138888888888714019282329)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0416666666666665519592062)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-0.5)); x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(s.x, u)); ry = vadd_vd_vd_vd(x.x, x.y); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(0))); r.x = vsel_vd_vo_vd_vd(o, rx, ry); r.y = vsel_vd_vo_vd_vd(o, ry, rx); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(2)), vcast_vi_i(2))); r.x = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.x))); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(2)), vcast_vi_i(2))); r.y = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.y))); return r; } TYPE2_FUNCATR vdouble2 XSINCOSPI_U05(vdouble d) { vopmask o; vdouble u, s, t, rx, ry; vdouble2 r, x, s2; u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); vint q = vtruncate_vi_vd(u); q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); t = s; s = vmul_vd_vd_vd(s, s); s2 = ddmul_vd2_vd_vd(t, t); // u = vcast_vd_d(-2.02461120785182399295868e-14); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(6.94821830580179461327784e-12)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-1.75724749952853179952664e-09)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(3.13361688966868392878422e-07)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-3.6576204182161551920361e-05)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00249039457019271850274356)); x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), vcast_vd2_d_d(-0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), vcast_vd2_d_d(0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_vd2_vd2_vd(x, t); rx = vadd_vd_vd_vd(x.x, x.y); rx = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), rx); // u = vcast_vd_d(9.94480387626843774090208e-16); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-3.89796226062932799164047e-13)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1.15011582539996035266901e-10)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-2.4611369501044697495359e-08)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(3.59086044859052754005062e-06)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000325991886927389905997954)); x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), vcast_vd2_d_d(0.0158543442438155018914259, -1.04693272280631521908845e-18)); x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), vcast_vd2_d_d(-0.308425137534042437259529, -1.95698492133633550338345e-17)); x = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd2(x, s2), vcast_vd_d(1)); ry = vadd_vd_vd_vd(x.x, x.y); // o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0))); r.x = vsel_vd_vo_vd_vd(o, rx, ry); r.y = vsel_vd_vo_vd_vd(o, ry, rx); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(4)), vcast_vi_i(4))); r.x = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.x))); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(4)), vcast_vi_i(4))); r.y = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.y))); o = vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)); r.x = vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(r.x))); r.y = vsel_vd_vo_vd_vd(o, vcast_vd_d(1), r.y); o = visinf_vo_vd(d); r.x = vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(r.x))); r.y = vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(r.y))); return r; } TYPE2_FUNCATR vdouble2 XSINCOSPI_U35(vdouble d) { vopmask o; vdouble u, s, t, rx, ry; vdouble2 r; u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); vint q = vtruncate_vi_vd(u); q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); t = s; s = vmul_vd_vd_vd(s, s); // u = vcast_vd_d(+0.6880638894766060136e-11); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.1757159564542310199e-8)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.3133616327257867311e-6)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.3657620416388486452e-4)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2490394570189932103e-2)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.8074551218828056320e-1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.7853981633974482790e+0)); rx = vmul_vd_vd_vd(u, t); // u = vcast_vd_d(-0.3860141213683794352e-12); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1150057888029681415e-9)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.2461136493006663553e-7)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.3590860446623516713e-5)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.3259918869269435942e-3)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1585434424381541169e-1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.3084251375340424373e+0)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(1)); ry = u; // o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0))); r.x = vsel_vd_vo_vd_vd(o, rx, ry); r.y = vsel_vd_vo_vd_vd(o, ry, rx); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(4)), vcast_vi_i(4))); r.x = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.x))); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(4)), vcast_vi_i(4))); r.y = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(r.y))); o = vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)); r.x = vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(r.x))); r.y = vreinterpret_vd_vm(vandnot_vm_vo64_vm(o, vreinterpret_vm_vd(r.y))); o = visinf_vo_vd(d); r.x = vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(r.x))); r.y = vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(r.y))); return r; } TYPE6_FUNCATR vdouble2 XMODF(vdouble x) { vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(1LL << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (1LL << 31))))))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); fr = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(1LL << 52)), vcast_vd_d(0), fr); vdouble2 ret; ret.x = vcopysign_vd_vd_vd(fr, x); ret.y = vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x); return ret; } #ifdef ENABLE_GNUABI EXPORT void xsincos(vdouble a, double *ps, double *pc) { vdouble2 r = sincosk(a); vstoreu_v_p_vd(ps, r.x); vstoreu_v_p_vd(pc, r.y); } EXPORT void xsincos_u1(vdouble a, double *ps, double *pc) { vdouble2 r = sincosk_u1(a); vstoreu_v_p_vd(ps, r.x); vstoreu_v_p_vd(pc, r.y); } EXPORT void xsincospi_u05(vdouble a, double *ps, double *pc) { vdouble2 r = sincospik_u05(a); vstoreu_v_p_vd(ps, r.x); vstoreu_v_p_vd(pc, r.y); } EXPORT void xsincospi_u35(vdouble a, double *ps, double *pc) { vdouble2 r = sincospik_u35(a); vstoreu_v_p_vd(ps, r.x); vstoreu_v_p_vd(pc, r.y); } EXPORT CONST vdouble xmodf(vdouble a, double *iptr) { vdouble2 r = modfk(a); vstoreu_v_p_vd(iptr, r.y); return r.x; } #endif // #ifdef ENABLE_GNUABI static INLINE CONST vdouble2 sinpik(vdouble d) { vopmask o; vdouble u, s, t; vdouble2 x, s2; u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); vint q = vtruncate_vi_vd(u); q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(2))); s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); t = s; s = vmul_vd_vd_vd(s, s); s2 = ddmul_vd2_vd_vd(t, t); // u = vsel_vd_vo_d_d(o, 9.94480387626843774090208e-16, -2.02461120785182399295868e-14); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -3.89796226062932799164047e-13, 6.948218305801794613277840e-12)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 1.150115825399960352669010e-10, -1.75724749952853179952664e-09)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -2.46113695010446974953590e-08, 3.133616889668683928784220e-07)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 3.590860448590527540050620e-06, -3.65762041821615519203610e-05)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -0.000325991886927389905997954, 0.0024903945701927185027435600)); x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), vsel_vd2_vo_d_d_d_d(o, 0.0158543442438155018914259, -1.04693272280631521908845e-18, -0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), vsel_vd2_vo_d_d_d_d(o, -0.308425137534042437259529, -1.95698492133633550338345e-17, 0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_vd2_vd2_vd2(x, vsel_vd2_vo_vd2_vd2(o, s2, vcast_vd2_vd_vd(t, vcast_vd_d(0)))); x = vsel_vd2_vo_vd2_vd2(o, ddadd2_vd2_vd2_vd(x, vcast_vd_d(1)), x); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(4)), vcast_vi_i(4))); x.x = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(x.x))); x.y = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(x.y))); return x; } EXPORT CONST vdouble xsinpi_u05(vdouble d) { vdouble2 x = sinpik(d); vdouble r = vadd_vd_vd_vd(x.x, x.y); r = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), r); r = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)), vreinterpret_vm_vd(r))); r = vreinterpret_vd_vm(vor_vm_vo64_vm(visinf_vo_vd(d), vreinterpret_vm_vd(r))); return r; } static INLINE CONST vdouble2 cospik(vdouble d) { vopmask o; vdouble u, s, t; vdouble2 x, s2; u = vmul_vd_vd_vd(d, vcast_vd_d(4.0)); vint q = vtruncate_vi_vd(u); q = vand_vi_vi_vi(vadd_vi_vi_vi(q, vxor_vi_vi_vi(vsrl_vi_vi_i(q, 31), vcast_vi_i(1))), vcast_vi_i(~1)); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(0))); s = vsub_vd_vd_vd(u, vcast_vd_vi(q)); t = s; s = vmul_vd_vd_vd(s, s); s2 = ddmul_vd2_vd_vd(t, t); // u = vsel_vd_vo_d_d(o, 9.94480387626843774090208e-16, -2.02461120785182399295868e-14); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -3.89796226062932799164047e-13, 6.948218305801794613277840e-12)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 1.150115825399960352669010e-10, -1.75724749952853179952664e-09)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -2.46113695010446974953590e-08, 3.133616889668683928784220e-07)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, 3.590860448590527540050620e-06, -3.65762041821615519203610e-05)); u = vmla_vd_vd_vd_vd(u, s, vsel_vd_vo_d_d(o, -0.000325991886927389905997954, 0.0024903945701927185027435600)); x = ddadd2_vd2_vd_vd2(vmul_vd_vd_vd(u, s), vsel_vd2_vo_d_d_d_d(o, 0.0158543442438155018914259, -1.04693272280631521908845e-18, -0.0807455121882807852484731, 3.61852475067037104849987e-18)); x = ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(s2, x), vsel_vd2_vo_d_d_d_d(o, -0.308425137534042437259529, -1.95698492133633550338345e-17, 0.785398163397448278999491, 3.06287113727155002607105e-17)); x = ddmul_vd2_vd2_vd2(x, vsel_vd2_vo_vd2_vd2(o, s2, vcast_vd2_vd_vd(t, vcast_vd_d(0)))); x = vsel_vd2_vo_vd2_vd2(o, ddadd2_vd2_vd2_vd(x, vcast_vd_d(1)), x); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vadd_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(4)), vcast_vi_i(4))); x.x = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(x.x))); x.y = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(x.y))); return x; } EXPORT CONST vdouble xcospi_u05(vdouble d) { vdouble2 x = cospik(d); vdouble r = vadd_vd_vd_vd(x.x, x.y); r = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX3/4)), vcast_vd_d(1), r); r = vreinterpret_vd_vm(vor_vm_vo64_vm(visinf_vo_vd(d), vreinterpret_vm_vd(r))); return r; } EXPORT CONST vdouble xtan(vdouble d) { vdouble u, s, x; vopmask o; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2 * 0.5), d); x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B2 * 0.5), x); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1e+7))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); vdouble dql = vrint_vd_vd(vsub_vd_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI)), dqh)); ql = vrint_vi_vd(dql); x = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A * 0.5), x); x = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_B * 0.5), x); x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_B * 0.5), x); x = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_C * 0.5), x); x = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_C * 0.5), x); x = vmla_vd_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D * 0.5), x); } else { ddi_t ddi = rempi(d); ql = ddi.i; x = vadd_vd_vd_vd(ddi.dd.x, ddi.dd.y); x = vreinterpret_vd_vm(vor_vm_vo64_vm(visinf_vo_vd(d), vreinterpret_vm_vd(x))); x = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)), vreinterpret_vm_vd(x))); } s = vmul_vd_vd_vd(x, x); o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))); x = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(x))); #ifdef SPLIT_KERNEL vdouble s2 = vmul_vd_vd_vd(s, s), v; u = vcast_vd_d(-4.31184585467324750724175e-05); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(-0.000137892809714281708733524)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(-6.07500301486087879295969e-05)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(0.000219040550724571513561967)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(0.00145461240472358871965441)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(0.00886321546662684547901456)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(0.0539682539049961967903002)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(0.333333333333320047664472)); v = vcast_vd_d(9.99583485362149960784268e-06); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(0.000103573238391744000389851)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(0.000157624358465342784274554)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(0.000148898734751616411290179)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(0.000595799595197098359744547)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(0.0035923150771440177410343)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(0.0218694899718446938985394)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(0.133333333334818976423364)); u = vmla_vd_vd_vd_vd(v, s, u); #else u = vcast_vd_d(9.99583485362149960784268e-06); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-4.31184585467324750724175e-05)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000103573238391744000389851)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-0.000137892809714281708733524)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000157624358465342784274554)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(-6.07500301486087879295969e-05)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000148898734751616411290179)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000219040550724571513561967)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000595799595197098359744547)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00145461240472358871965441)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0035923150771440177410343)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00886321546662684547901456)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0218694899718446938985394)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0539682539049961967903002)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.133333333334818976423364)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.333333333333320047664472)); #endif u = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(u, x), x); u = vsel_vd_vo_vd_vd(o, vrec_vd_vd(u), u); u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); return u; } EXPORT CONST vdouble xtan_u1(vdouble d) { vdouble u; vdouble2 s, t, x; vopmask o; vint ql; if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX2))))) { vdouble dql = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2 * M_1_PI))); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dql, vcast_vd_d(-PI_A2*0.5), d); s = ddadd_vd2_vd_vd (u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B2*0.5))); } else if (LIKELY(vtestallones_i_vo64(vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(TRIGRANGEMAX))))) { vdouble dqh = vtruncate_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(2*M_1_PI / (1 << 24)))); dqh = vmul_vd_vd_vd(dqh, vcast_vd_d(1 << 24)); s = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(vcast_vd2_d_d(M_2_PI_H, M_2_PI_L), d), vsub_vd_vd_vd(vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-0.5), vcast_vd_d(0.5)), dqh)); const vdouble dql = vtruncate_vd_vd(vadd_vd_vd_vd(s.x, s.y)); ql = vrint_vi_vd(dql); u = vmla_vd_vd_vd_vd(dqh, vcast_vd_d(-PI_A * 0.5), d); s = ddadd_vd2_vd_vd(u, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_A*0.5 ))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_B*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_B*0.5 ))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dqh, vcast_vd_d(-PI_C*0.5))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dql, vcast_vd_d(-PI_C*0.5 ))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vadd_vd_vd_vd(dqh, dql), vcast_vd_d(-PI_D*0.5))); } else { ddi_t ddi = rempi(d); ql = ddi.i; s = ddi.dd; o = vor_vo_vo_vo(visinf_vo_vd(d), visnan_vo_vd(d)); s.x = vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(s.x))); s.y = vreinterpret_vd_vm(vor_vm_vo64_vm(o, vreinterpret_vm_vd(s.y))); } o = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(ql, vcast_vi_i(1)), vcast_vi_i(1))); vmask n = vand_vm_vo64_vm(o, vreinterpret_vm_vd(vcast_vd_d(-0.0))); s.x = vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(s.x), n)); s.y = vreinterpret_vd_vm(vxor_vm_vm_vm(vreinterpret_vm_vd(s.y), n)); t = s; s = ddsqu_vd2_vd2(s); #ifdef SPLIT_KERNEL vdouble sx2 = vmul_vd_vd_vd(s.x, s.x), v; u = vcast_vd_d(-2.59519791585924697698614e-05); u = vmla_vd_vd_vd_vd(u, sx2, vcast_vd_d(-3.05033014433946488225616e-05)); u = vmla_vd_vd_vd_vd(u, sx2, vcast_vd_d(8.09674518280159187045078e-05)); u = vmla_vd_vd_vd_vd(u, sx2, vcast_vd_d(0.000588505168743587154904506)); u = vmla_vd_vd_vd_vd(u, sx2, vcast_vd_d(0.00359208743836906619142924)); u = vmla_vd_vd_vd_vd(u, sx2, vcast_vd_d(0.0218694882853846389592078)); u = vmla_vd_vd_vd_vd(u, sx2, vcast_vd_d(0.133333333333125941821962)); v = vcast_vd_d(1.01419718511083373224408e-05); v = vmla_vd_vd_vd_vd(v, sx2, vcast_vd_d(5.23388081915899855325186e-05)); v = vmla_vd_vd_vd_vd(v, sx2, vcast_vd_d(7.14707504084242744267497e-05)); v = vmla_vd_vd_vd_vd(v, sx2, vcast_vd_d(0.000244884931879331847054404)); v = vmla_vd_vd_vd_vd(v, sx2, vcast_vd_d(0.00145612788922812427978848)); v = vmla_vd_vd_vd_vd(v, sx2, vcast_vd_d(0.00886323944362401618113356)); v = vmla_vd_vd_vd_vd(v, sx2, vcast_vd_d(0.0539682539781298417636002)); u = vmla_vd_vd_vd_vd(v, s.x, u); #else u = vcast_vd_d(1.01419718511083373224408e-05); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-2.59519791585924697698614e-05)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(5.23388081915899855325186e-05)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(-3.05033014433946488225616e-05)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(7.14707504084242744267497e-05)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(8.09674518280159187045078e-05)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.000244884931879331847054404)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.000588505168743587154904506)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00145612788922812427978848)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00359208743836906619142924)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00886323944362401618113356)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0218694882853846389592078)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0539682539781298417636002)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.133333333333125941821962)); #endif x = ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd2_vd2(ddadd_vd2_vd_vd(vcast_vd_d(0.333333333333334980164153), vmul_vd_vd_vd(u, s.x)), s)); x = ddmul_vd2_vd2_vd2(t, x); x = vsel_vd2_vo_vd2_vd2(o, ddrec_vd2_vd2(x), x); u = vadd_vd_vd_vd(x.x, x.y); u = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, u); return u; } static INLINE CONST vdouble atan2k(vdouble y, vdouble x) { vdouble s, t, u; vint q; vopmask p; q = vsel_vi_vd_vi(x, vcast_vi_i(-2)); x = vabs_vd_vd(x); q = vsel_vi_vd_vd_vi_vi(x, y, vadd_vi_vi_vi(q, vcast_vi_i(1)), q); p = vlt_vo_vd_vd(x, y); s = vsel_vd_vo_vd_vd(p, vneg_vd_vd(x), y); t = vmax_vd_vd_vd(x, y); s = vdiv_vd_vd_vd(s, t); t = vmul_vd_vd_vd(s, s); #ifdef SPLIT_KERNEL vdouble t2 = vmul_vd_vd_vd(t, t), v; u = vcast_vd_d(-1.88796008463073496563746e-05); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.00110611831486672482563471)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.00889896195887655491740809)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.0254517624932312641616861)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.0407629191276836500001934)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.0523674852303482457616113)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.0666573579361080525984562)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.090908995008245008229153)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.14285714266771329383765)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.333333333333311110369124)); v = vcast_vd_d(0.000209850076645816976906797); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.00370026744188713119232403)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.016599329773529201970117)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.0337852580001353069993897)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.0466667150077840625632675)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.0587666392926673580854313)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.0769219538311769618355029)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.111111105648261418443745)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.199999999996591265594148)); u = vmla_vd_vd_vd_vd(v, t, u); #else u = vcast_vd_d(-1.88796008463073496563746e-05); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.000209850076645816976906797)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.00110611831486672482563471)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.00370026744188713119232403)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.00889896195887655491740809)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.016599329773529201970117)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0254517624932312641616861)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0337852580001353069993897)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0407629191276836500001934)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0466667150077840625632675)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0523674852303482457616113)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0587666392926673580854313)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0666573579361080525984562)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0769219538311769618355029)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.090908995008245008229153)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.111111105648261418443745)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.14285714266771329383765)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.199999999996591265594148)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.333333333333311110369124)); #endif t = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(t, u), s); t = vmla_vd_vd_vd_vd(vcast_vd_vi(q), vcast_vd_d(M_PI/2), t); return t; } static INLINE CONST vdouble2 atan2k_u1(vdouble2 y, vdouble2 x) { vdouble u; vdouble2 s, t; vint q; vopmask p; q = vsel_vi_vd_vi(x.x, vcast_vi_i(-2)); p = vlt_vo_vd_vd(x.x, vcast_vd_d(0)); vmask b = vand_vm_vo64_vm(p, vreinterpret_vm_vd(vcast_vd_d(-0.0))); x.x = vreinterpret_vd_vm(vxor_vm_vm_vm(b, vreinterpret_vm_vd(x.x))); x.y = vreinterpret_vd_vm(vxor_vm_vm_vm(b, vreinterpret_vm_vd(x.y))); q = vsel_vi_vd_vd_vi_vi(x.x, y.x, vadd_vi_vi_vi(q, vcast_vi_i(1)), q); p = vlt_vo_vd_vd(x.x, y.x); s = vsel_vd2_vo_vd2_vd2(p, ddneg_vd2_vd2(x), y); t = vsel_vd2_vo_vd2_vd2(p, y, x); s = dddiv_vd2_vd2_vd2(s, t); t = ddsqu_vd2_vd2(s); t = ddnormalize_vd2_vd2(t); #ifdef SPLIT_KERNEL vdouble tx3 = vmul_vd_vd_vd(vmul_vd_vd_vd(t.x, t.x), t.x), v; u = vcast_vd_d(0.00070557664296393412389774); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.00251865614498713360352999)); u = vmla_vd_vd_vd_vd(u, tx3, vcast_vd_d(0.0208024799924145797902497)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0289002344784740315686289)); u = vmla_vd_vd_vd_vd(u, tx3, vcast_vd_d(0.0470843011653283988193763)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0524914210588448421068719)); u = vmla_vd_vd_vd_vd(u, tx3, vcast_vd_d(0.0769225330296203768654095)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0909090442773387574781907)); u = vmla_vd_vd_vd_vd(u, tx3, vcast_vd_d(0.199999999997977351284817)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.333333333333317605173818)); v = vcast_vd_d(1.06298484191448746607415e-05); v = vmla_vd_vd_vd_vd(v, t.x, vcast_vd_d(-0.000125620649967286867384336)); v = vmla_vd_vd_vd_vd(v, tx3, vcast_vd_d(0.00646262899036991172313504)); v = vmla_vd_vd_vd_vd(v, t.x, vcast_vd_d(-0.0128281333663399031014274)); v = vmla_vd_vd_vd_vd(v, tx3, vcast_vd_d(0.0359785005035104590853656)); v = vmla_vd_vd_vd_vd(v, t.x, vcast_vd_d(-0.041848579703592507506027)); v = vmla_vd_vd_vd_vd(v, tx3, vcast_vd_d(0.0587946590969581003860434)); v = vmla_vd_vd_vd_vd(v, t.x, vcast_vd_d(-0.0666620884778795497194182)); v = vmla_vd_vd_vd_vd(v, tx3, vcast_vd_d(0.111111108376896236538123)); v = vmla_vd_vd_vd_vd(v, t.x, vcast_vd_d(-0.142857142756268568062339)); u = vmla_vd_vd_vd_vd(v, vmul_vd_vd_vd(t.x, t.x), u); #else u = vcast_vd_d(1.06298484191448746607415e-05); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.000125620649967286867384336)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.00070557664296393412389774)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.00251865614498713360352999)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.00646262899036991172313504)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0128281333663399031014274)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.0208024799924145797902497)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0289002344784740315686289)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.0359785005035104590853656)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.041848579703592507506027)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.0470843011653283988193763)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0524914210588448421068719)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.0587946590969581003860434)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0666620884778795497194182)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.0769225330296203768654095)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.0909090442773387574781907)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.111111108376896236538123)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.142857142756268568062339)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(0.199999999997977351284817)); u = vmla_vd_vd_vd_vd(u, t.x, vcast_vd_d(-0.333333333333317605173818)); #endif t = ddmul_vd2_vd2_vd(t, u); t = ddmul_vd2_vd2_vd2(s, ddadd_vd2_vd_vd2(vcast_vd_d(1), t)); t = ddadd_vd2_vd2_vd2(ddmul_vd2_vd2_vd(vcast_vd2_d_d(1.570796326794896557998982, 6.12323399573676603586882e-17), vcast_vd_vi(q)), t); return t; } static INLINE CONST vdouble visinf2_vd_vd_vd(vdouble d, vdouble m) { return vreinterpret_vd_vm(vand_vm_vo64_vm(visinf_vo_vd(d), vor_vm_vm_vm(vand_vm_vm_vm(vreinterpret_vm_vd(d), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(m)))); } EXPORT CONST vdouble xatan2(vdouble y, vdouble x) { vdouble r = atan2k(vabs_vd_vd(y), x); r = vmulsign_vd_vd_vd(r, x); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, vcast_vd_d(0))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/2), x))), r); r = vsel_vd_vo_vd_vd(visinf_vo_vd(y), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/4), x))), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(y, vcast_vd_d(0.0)), vreinterpret_vd_vm(vand_vm_vo64_vm(vsignbit_vo_vd(x), vreinterpret_vm_vd(vcast_vd_d(M_PI)))), r); r = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(vmulsign_vd_vd_vd(r, y)))); return r; } EXPORT CONST vdouble xatan2_u1(vdouble y, vdouble x) { vopmask o = vlt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(5.5626846462680083984e-309)); // nexttoward((1.0 / DBL_MAX), 1) x = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(x, vcast_vd_d(1ULL << 53)), x); y = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(y, vcast_vd_d(1ULL << 53)), y); vdouble2 d = atan2k_u1(vcast_vd2_vd_vd(vabs_vd_vd(y), vcast_vd_d(0)), vcast_vd2_vd_vd(x, vcast_vd_d(0))); vdouble r = vadd_vd_vd_vd(d.x, d.y); r = vmulsign_vd_vd_vd(r, x); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, vcast_vd_d(0))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/2), x))), r); r = vsel_vd_vo_vd_vd(visinf_vo_vd(y), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), visinf2_vd_vd_vd(x, vmulsign_vd_vd_vd(vcast_vd_d(M_PI/4), x))), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(y, vcast_vd_d(0.0)), vreinterpret_vd_vm(vand_vm_vo64_vm(vsignbit_vo_vd(x), vreinterpret_vm_vd(vcast_vd_d(M_PI)))), r); r = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(vmulsign_vd_vd_vd(r, y)))); return r; } EXPORT CONST vdouble xasin(vdouble d) { vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))); vdouble x = vsel_vd_vo_vd_vd(o, vabs_vd_vd(d), vsqrt_vd_vd(x2)), u; #ifdef SPLIT_KERNEL vdouble x4 = vmul_vd_vd_vd(x2, x2), v; u = vcast_vd_d(-0.1581918243329996643e-1); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.6606077476277170610e-2)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.1388715184501609218e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.2237176181932048341e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.4464285681377102438e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.1666666666666497543e+0)); v = vcast_vd_d(+0.3161587650653934628e-1); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1929045477267910674e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1215360525577377331e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1735956991223614604e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.3038195928038132237e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.7500000000378581611e-1)); u = vmla_vd_vd_vd_vd(v, x2, u); #else u = vcast_vd_d(+0.3161587650653934628e-1); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(-0.1581918243329996643e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1929045477267910674e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.6606077476277170610e-2)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1215360525577377331e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1388715184501609218e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1735956991223614604e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.2237176181932048341e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.3038195928038132237e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.4464285681377102438e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.7500000000378581611e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1666666666666497543e+0)); #endif u = vmla_vd_vd_vd_vd(u, vmul_vd_vd_vd(x, x2), x); vdouble r = vsel_vd_vo_vd_vd(o, u, vmla_vd_vd_vd_vd(u, vcast_vd_d(-2), vcast_vd_d(M_PI/2))); return vmulsign_vd_vd_vd(r, d); } EXPORT CONST vdouble xasin_u1(vdouble d) { vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))), u; vdouble2 x = vsel_vd2_vo_vd2_vd2(o, vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), ddsqrt_vd2_vd(x2)); x = vsel_vd2_vo_vd2_vd2(veq_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1.0)), vcast_vd2_d_d(0, 0), x); #ifdef SPLIT_KERNEL vdouble x4 = vmul_vd_vd_vd(x2, x2), v; u = vcast_vd_d(-0.1581918243329996643e-1); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.6606077476277170610e-2)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.1388715184501609218e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.2237176181932048341e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.4464285681377102438e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.1666666666666497543e+0)); v = vcast_vd_d(+0.3161587650653934628e-1); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1929045477267910674e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1215360525577377331e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1735956991223614604e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.3038195928038132237e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.7500000000378581611e-1)); u = vmla_vd_vd_vd_vd(v, x2, u); #else u = vcast_vd_d(+0.3161587650653934628e-1); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(-0.1581918243329996643e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1929045477267910674e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.6606077476277170610e-2)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1215360525577377331e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1388715184501609218e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1735956991223614604e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.2237176181932048341e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.3038195928038132237e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.4464285681377102438e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.7500000000378581611e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1666666666666497543e+0)); #endif u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(x2, x.x)); vdouble2 y = ddsub_vd2_vd2_vd(ddsub_vd2_vd2_vd2(vcast_vd2_d_d(3.141592653589793116/4, 1.2246467991473532072e-16/4), x), u); vdouble r = vsel_vd_vo_vd_vd(o, vadd_vd_vd_vd(u, x.x), vmul_vd_vd_vd(vadd_vd_vd_vd(y.x, y.y), vcast_vd_d(2))); return vmulsign_vd_vd_vd(r, d); } EXPORT CONST vdouble xacos(vdouble d) { vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))), u; vdouble x = vsel_vd_vo_vd_vd(o, vabs_vd_vd(d), vsqrt_vd_vd(x2)); x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1.0)), vcast_vd_d(0), x); #ifdef SPLIT_KERNEL vdouble x4 = vmul_vd_vd_vd(x2, x2), v; u = vcast_vd_d(-0.1581918243329996643e-1); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.6606077476277170610e-2)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.1388715184501609218e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.2237176181932048341e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.4464285681377102438e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.1666666666666497543e+0)); v = vcast_vd_d(+0.3161587650653934628e-1); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1929045477267910674e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1215360525577377331e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1735956991223614604e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.3038195928038132237e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.7500000000378581611e-1)); u = vmla_vd_vd_vd_vd(v, x2, u); #else u = vcast_vd_d(+0.3161587650653934628e-1); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(-0.1581918243329996643e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1929045477267910674e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.6606077476277170610e-2)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1215360525577377331e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1388715184501609218e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1735956991223614604e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.2237176181932048341e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.3038195928038132237e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.4464285681377102438e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.7500000000378581611e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1666666666666497543e+0)); #endif u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(x2, x)); vdouble y = vsub_vd_vd_vd(vcast_vd_d(M_PI/2), vadd_vd_vd_vd(vmulsign_vd_vd_vd(x, d), vmulsign_vd_vd_vd(u, d))); x = vadd_vd_vd_vd(x, u); vdouble r = vsel_vd_vo_vd_vd(o, y, vmul_vd_vd_vd(x, vcast_vd_d(2))); return vsel_vd_vo_vd_vd(vandnot_vo_vo_vo(o, vlt_vo_vd_vd(d, vcast_vd_d(0))), ddadd_vd2_vd2_vd(vcast_vd2_d_d(3.141592653589793116, 1.2246467991473532072e-16), vneg_vd_vd(r)).x, r); } EXPORT CONST vdouble xacos_u1(vdouble d) { vopmask o = vlt_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(0.5)); vdouble x2 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, d), vmul_vd_vd_vd(vsub_vd_vd_vd(vcast_vd_d(1), vabs_vd_vd(d)), vcast_vd_d(0.5))), u; vdouble2 x = vsel_vd2_vo_vd2_vd2(o, vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), ddsqrt_vd2_vd(x2)); x = vsel_vd2_vo_vd2_vd2(veq_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1.0)), vcast_vd2_d_d(0, 0), x); #ifdef SPLIT_KERNEL vdouble x4 = vmul_vd_vd_vd(x2, x2), v; u = vcast_vd_d(-0.1581918243329996643e-1); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.6606077476277170610e-2)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.1388715184501609218e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.2237176181932048341e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.4464285681377102438e-1)); u = vmla_vd_vd_vd_vd(u, x4, vcast_vd_d(+0.1666666666666497543e+0)); v = vcast_vd_d(+0.3161587650653934628e-1); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1929045477267910674e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1215360525577377331e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.1735956991223614604e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.3038195928038132237e-1)); v = vmla_vd_vd_vd_vd(v, x4, vcast_vd_d(+0.7500000000378581611e-1)); u = vmla_vd_vd_vd_vd(v, x2, u); #else u = vcast_vd_d(+0.3161587650653934628e-1); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(-0.1581918243329996643e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1929045477267910674e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.6606077476277170610e-2)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1215360525577377331e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1388715184501609218e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1735956991223614604e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.2237176181932048341e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.3038195928038132237e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.4464285681377102438e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.7500000000378581611e-1)); u = vmla_vd_vd_vd_vd(u, x2, vcast_vd_d(+0.1666666666666497543e+0)); #endif u = vmul_vd_vd_vd(u, vmul_vd_vd_vd(x2, x.x)); vdouble2 y = ddsub_vd2_vd2_vd2(vcast_vd2_d_d(3.141592653589793116/2, 1.2246467991473532072e-16/2), ddadd_vd2_vd_vd(vmulsign_vd_vd_vd(x.x, d), vmulsign_vd_vd_vd(u, d))); x = ddadd_vd2_vd2_vd(x, u); y = vsel_vd2_vo_vd2_vd2(o, y, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); y = vsel_vd2_vo_vd2_vd2(vandnot_vo_vo_vo(o, vlt_vo_vd_vd(d, vcast_vd_d(0))), ddsub_vd2_vd2_vd2(vcast_vd2_d_d(3.141592653589793116, 1.2246467991473532072e-16), y), y); return vadd_vd_vd_vd(y.x, y.y); } EXPORT CONST vdouble xatan_u1(vdouble d) { vdouble2 d2 = atan2k_u1(vcast_vd2_vd_vd(vabs_vd_vd(d), vcast_vd_d(0)), vcast_vd2_d_d(1, 0)); vdouble r = vadd_vd_vd_vd(d2.x, d2.y); r = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vcast_vd_d(1.570796326794896557998982), r); return vmulsign_vd_vd_vd(r, d); } EXPORT CONST vdouble xatan(vdouble s) { vdouble t, u; vint q; q = vsel_vi_vd_vi(s, vcast_vi_i(2)); s = vabs_vd_vd(s); q = vsel_vi_vd_vd_vi_vi(vcast_vd_d(1), s, vadd_vi_vi_vi(q, vcast_vi_i(1)), q); s = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vcast_vd_d(1), s), vrec_vd_vd(s), s); t = vmul_vd_vd_vd(s, s); #ifdef SPLIT_KERNEL vdouble t2 = vmul_vd_vd_vd(t, t), v; u = vcast_vd_d(-1.88796008463073496563746e-05); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.00110611831486672482563471)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.00889896195887655491740809)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.0254517624932312641616861)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.0407629191276836500001934)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.0523674852303482457616113)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.0666573579361080525984562)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.090908995008245008229153)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.14285714266771329383765)); u = vmla_vd_vd_vd_vd(u, t2, vcast_vd_d(-0.333333333333311110369124)); v = vcast_vd_d(0.000209850076645816976906797); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.00370026744188713119232403)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.016599329773529201970117)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.0337852580001353069993897)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.0466667150077840625632675)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.0587666392926673580854313)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.0769219538311769618355029)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.111111105648261418443745)); v = vmla_vd_vd_vd_vd(v, t2, vcast_vd_d(0.199999999996591265594148)); u = vmla_vd_vd_vd_vd(v, t, u); #else u = vcast_vd_d(-1.88796008463073496563746e-05); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.000209850076645816976906797)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.00110611831486672482563471)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.00370026744188713119232403)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.00889896195887655491740809)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.016599329773529201970117)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0254517624932312641616861)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0337852580001353069993897)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0407629191276836500001934)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0466667150077840625632675)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0523674852303482457616113)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0587666392926673580854313)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.0666573579361080525984562)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.0769219538311769618355029)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.090908995008245008229153)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.111111105648261418443745)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.14285714266771329383765)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(0.199999999996591265594148)); u = vmla_vd_vd_vd_vd(u, t, vcast_vd_d(-0.333333333333311110369124)); #endif t = vmla_vd_vd_vd_vd(s, vmul_vd_vd_vd(t, u), s); t = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(1)), vcast_vi_i(1))), vsub_vd_vd_vd(vcast_vd_d(M_PI/2), t), t); t = vreinterpret_vd_vm(vxor_vm_vm_vm(vand_vm_vo64_vm(vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(q, vcast_vi_i(2)), vcast_vi_i(2))), vreinterpret_vm_vd(vcast_vd_d(-0.0))), vreinterpret_vm_vd(t))); return t; } EXPORT CONST vdouble xlog(vdouble d) { vdouble x, x2; vdouble t, m; #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(1LL << 32) * (double)(1LL << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = vdiv_vd_vd_vd(vadd_vd_vd_vd(vcast_vd_d(-1), m), vadd_vd_vd_vd(vcast_vd_d(1), m)); x2 = vmul_vd_vd_vd(x, x); t = vcast_vd_d(0.153487338491425068243146); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.152519917006351951593857)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.181863266251982985677316)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.222221366518767365905163)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.285714294746548025383248)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.399999999950799600689777)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.6666666666667778740063)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(2)); #ifndef ENABLE_AVX512F x = vmla_vd_vd_vd_vd(x, t, vmul_vd_vd_vd(vcast_vd_d(0.693147180559945286226764), vcast_vd_vi(e))); x = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), x); x = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), x); x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), x); #else x = vmla_vd_vd_vd_vd(x, t, vmul_vd_vd_vd(vcast_vd_d(0.693147180559945286226764), e)); x = vfixup_vd_vd_vd_vi2_i(x, d, vcast_vi2_i((5 << (5*4))), 0); #endif return x; } EXPORT CONST vdouble xexp(vdouble d) { vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(R_LN2))), s; vint q = vrint_vi_vd(u); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2U), d); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2L), s); #ifdef ENABLE_FMA_DP #ifdef SPLIT_KERNEL vdouble s2 = vmul_vd_vd_vd(s, s), v; u = vcast_vd_d(+0.2081276378237164457e-8); u = vfma_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.2755762628169491192e-6)); u = vfma_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.2480158687479686264e-4)); u = vfma_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.1388888888914497797e-2)); u = vfma_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.4166666666666602598e-1)); u = vfma_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.5000000000000000000e+0)); v = vcast_vd_d(+0.2511210703042288022e-7); v = vfma_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.2755723402025388239e-5)); v = vfma_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.1984126989855865850e-3)); v = vfma_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.8333333333314938210e-2)); v = vfma_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.1666666666666669072e+0)); u = vmla_vd_vd_vd_vd(v, s, u); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1000000000000000000e+1)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1000000000000000000e+1)); #else // #ifdef SPLIT_KERNEL u = vcast_vd_d(+0.2081276378237164457e-8); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2511210703042288022e-7)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2755762628169491192e-6)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2755723402025388239e-5)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2480158687479686264e-4)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1984126989855865850e-3)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1388888888914497797e-2)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.8333333333314938210e-2)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.4166666666666602598e-1)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1666666666666669072e+0)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5000000000000000000e+0)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1000000000000000000e+1)); u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1000000000000000000e+1)); #endif // #ifdef SPLIT_KERNEL #else // #ifdef ENABLE_FMA_DP u = vcast_vd_d(2.08860621107283687536341e-09); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.51112930892876518610661e-08)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573911234900471893338e-07)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75572362911928827629423e-06)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.4801587159235472998791e-05)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000198412698960509205564975)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00138888888889774492207962)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00833333333331652721664984)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0416666666666665047591422)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.166666666666666851703837)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.5)); u = vadd_vd_vd_vd(vcast_vd_d(1), vmla_vd_vd_vd_vd(vmul_vd_vd_vd(s, s), u, s)); #endif // #ifdef ENABLE_FMA_DP u = vldexp2_vd_vd_vi(u, q); u = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(709.78271114955742909217217426)), vcast_vd_d(SLEEF_INFINITY), u); u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-1000)), vreinterpret_vm_vd(u))); return u; } static INLINE CONST vdouble expm1k(vdouble d) { vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(R_LN2))), s; vint q = vrint_vi_vd(u); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2U), d); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L2L), s); u = vcast_vd_d(2.08860621107283687536341e-09); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.51112930892876518610661e-08)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75573911234900471893338e-07)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.75572362911928827629423e-06)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(2.4801587159235472998791e-05)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.000198412698960509205564975)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00138888888889774492207962)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.00833333333331652721664984)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.0416666666666665047591422)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.166666666666666851703837)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(0.5)); u = vmla_vd_vd_vd_vd(vmul_vd_vd_vd(s, s), u, s); u = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(q, vcast_vi_i(0))), u, vsub_vd_vd_vd(vldexp2_vd_vd_vi(vadd_vd_vd_vd(u, vcast_vd_d(1)), q), vcast_vd_d(1))); return u; } static INLINE CONST vdouble2 logk(vdouble d) { vdouble2 x, x2, s; vdouble t, m; #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(1LL << 32) * (double)(1LL << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); x2 = ddsqu_vd2_vd2(x); t = vcast_vd_d(0.116255524079935043668677); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.103239680901072952701192)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.117754809412463995466069)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.13332981086846273921509)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.153846227114512262845736)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.181818180850050775676507)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.222222222230083560345903)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.285714285714249172087875)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.400000000000000077715612)); vdouble2 c = vcast_vd2_d_d(0.666666666666666629659233, 3.80554962542412056336616e-17); #ifndef ENABLE_AVX512F s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); #else s = ddmul_vd2_vd2_vd(vcast_vd2_vd_vd(vcast_vd_d(0.693147180559945286226764), vcast_vd_d(2.319046813846299558417771e-17)), e); #endif s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd2(ddmul_vd2_vd2_vd2(x2, x), ddadd2_vd2_vd2_vd2(ddmul_vd2_vd2_vd(x2, t), c))); return s; } EXPORT CONST vdouble xlog_u1(vdouble d) { vdouble2 x; vdouble t, m, x2; #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(1LL << 32) * (double)(1LL << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); x2 = vmul_vd_vd_vd(x.x, x.x); t = vcast_vd_d(0.1532076988502701353e+0); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.1525629051003428716e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.1818605932937785996e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.2222214519839380009e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.2857142932794299317e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.3999999999635251990e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.6666666666667333541e+0)); #ifndef ENABLE_AVX512F vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); #else vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), e); #endif s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, x.x), t)); vdouble r = vadd_vd_vd_vd(s.x, s.y); #ifndef ENABLE_AVX512F r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); #else r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } static INLINE CONST vdouble expk(vdouble2 d) { vdouble u = vmul_vd_vd_vd(vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(R_LN2)); vdouble dq = vrint_vd_vd(u); vint q = vrint_vi_vd(dq); vdouble2 s, t; s = ddadd2_vd2_vd2_vd(d, vmul_vd_vd_vd(dq, vcast_vd_d(-L2U))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dq, vcast_vd_d(-L2L))); s = ddnormalize_vd2_vd2(s); u = vcast_vd_d(2.51069683420950419527139e-08); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.76286166770270649116855e-07)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.75572496725023574143864e-06)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(2.48014973989819794114153e-05)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.000198412698809069797676111)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0013888888939977128960529)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.00833333333332371417601081)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.0416666666665409524128449)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.166666666666666740681535)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(0.500000000000000999200722)); t = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd(ddsqu_vd2_vd2(s), u)); t = ddadd_vd2_vd_vd2(vcast_vd_d(1), t); u = vadd_vd_vd_vd(t.x, t.y); u = vldexp2_vd_vd_vi(u, q); u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d.x, vcast_vd_d(-1000)), vreinterpret_vm_vd(u))); return u; } EXPORT CONST vdouble xpow(vdouble x, vdouble y) { #if 1 vopmask yisint = visint_vo_vd(y); vopmask yisodd = vand_vo_vo_vo(visodd_vo_vd(y), yisint); vdouble2 d = ddmul_vd2_vd2_vd(logk(vabs_vd_vd(x)), y); vdouble result = expk(d); result = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d.x, vcast_vd_d(709.78271114955742909217217426)), vcast_vd_d(SLEEF_INFINITY), result); result = vmul_vd_vd_vd(result, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(1), vsel_vd_vo_vd_vd(yisint, vsel_vd_vo_vd_vd(yisodd, vcast_vd_d(-1.0), vcast_vd_d(1)), vcast_vd_d(SLEEF_NAN)))); vdouble efx = vmulsign_vd_vd_vd(vsub_vd_vd_vd(vabs_vd_vd(x), vcast_vd_d(1)), y); result = vsel_vd_vo_vd_vd(visinf_vo_vd(y), vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(efx, vcast_vd_d(0.0)), vreinterpret_vm_vd(vsel_vd_vo_vd_vd(veq_vo_vd_vd(efx, vcast_vd_d(0.0)), vcast_vd_d(1.0), vcast_vd_d(SLEEF_INFINITY))))), result); result = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), veq_vo_vd_vd(x, vcast_vd_d(0.0))), vmul_vd_vd_vd(vsel_vd_vo_vd_vd(yisodd, vsign_vd_vd(x), vcast_vd_d(1.0)), vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0.0)), vneg_vd_vd(y), y), vcast_vd_d(0.0)), vreinterpret_vm_vd(vcast_vd_d(SLEEF_INFINITY))))), result); result = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(result))); result = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(y, vcast_vd_d(0)), veq_vo_vd_vd(x, vcast_vd_d(1))), vcast_vd_d(1), result); return result; #else return expk(ddmul_vd2_vd2_vd(logk(x), y)); #endif } static INLINE CONST vdouble2 expk2(vdouble2 d) { vdouble u = vmul_vd_vd_vd(vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(R_LN2)); vdouble dq = vrint_vd_vd(u); vint q = vrint_vi_vd(dq); vdouble2 s, t; s = ddadd2_vd2_vd2_vd(d, vmul_vd_vd_vd(dq, vcast_vd_d(-L2U))); s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(dq, vcast_vd_d(-L2L))); u = vcast_vd_d(+0.1602472219709932072e-9); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(+0.2092255183563157007e-8)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(+0.2505230023782644465e-7)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(+0.2755724800902135303e-6)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(+0.2755731892386044373e-5)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(+0.2480158735605815065e-4)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(+0.1984126984148071858e-3)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(+0.1388888888886763255e-2)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(+0.8333333333333347095e-2)); u = vmla_vd_vd_vd_vd(u, s.x, vcast_vd_d(+0.4166666666666669905e-1)); t = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(s, u), vcast_vd_d(+0.1666666666666666574e+0)); t = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd2(s, t), vcast_vd_d(0.5)); t = ddadd2_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd2(ddsqu_vd2_vd2(s), t)); t = ddadd_vd2_vd_vd2(vcast_vd_d(1), t); t.x = vldexp2_vd_vd_vi(t.x, q); t.y = vldexp2_vd_vd_vi(t.y, q); t.x = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d.x, vcast_vd_d(-1000)), vreinterpret_vm_vd(t.x))); t.y = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d.x, vcast_vd_d(-1000)), vreinterpret_vm_vd(t.y))); return t; } EXPORT CONST vdouble xsinh(vdouble x) { vdouble y = vabs_vd_vd(x); vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0))); d = ddsub_vd2_vd2_vd2(d, ddrec_vd2_vd2(d)); y = vmul_vd_vd_vd(vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(0.5)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(710)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST vdouble xcosh(vdouble x) { vdouble y = vabs_vd_vd(x); vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0))); d = ddadd_vd2_vd2_vd2(d, ddrec_vd2_vd2(d)); y = vmul_vd_vd_vd(vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(0.5)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(710)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST vdouble xtanh(vdouble x) { vdouble y = vabs_vd_vd(x); vdouble2 d = expk2(vcast_vd2_vd_vd(y, vcast_vd_d(0))); vdouble2 e = ddrec_vd2_vd2(d); d = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd2_vd2(d, ddneg_vd2_vd2(e)), ddadd2_vd2_vd2_vd2(d, e)); y = vadd_vd_vd_vd(d.x, d.y); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(18.714973875)), visnan_vo_vd(y)), vcast_vd_d(1.0), y); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST vdouble xsinh_u35(vdouble x) { vdouble e = expm1k(vabs_vd_vd(x)); vdouble y = vdiv_vd_vd_vd(vadd_vd_vd_vd(e, vcast_vd_d(2)), vadd_vd_vd_vd(e, vcast_vd_d(1))); y = vmul_vd_vd_vd(y, vmul_vd_vd_vd(vcast_vd_d(0.5), e)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(709)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST vdouble xcosh_u35(vdouble x) { vdouble e = xexp(vabs_vd_vd(x)); vdouble y = vmla_vd_vd_vd_vd(vcast_vd_d(0.5), e, vdiv_vd_vd_vd(vcast_vd_d(0.5), e)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(709)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST vdouble xtanh_u35(vdouble x) { vdouble d = expm1k(vmul_vd_vd_vd(vcast_vd_d(2), vabs_vd_vd(x))); vdouble y = vdiv_vd_vd_vd(d, vadd_vd_vd_vd(vcast_vd_d(2), d)); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(18.714973875)), visnan_vo_vd(y)), vcast_vd_d(1.0), y); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } static INLINE CONST vdouble2 logk2(vdouble2 d) { vdouble2 x, x2, m, s; vdouble t; vint e; e = vilogbk_vi_vd(vmul_vd_vd_vd(d.x, vcast_vd_d(1.0/0.75))); m.x = vldexp2_vd_vd_vi(d.x, vneg_vi_vi(e)); m.y = vldexp2_vd_vd_vi(d.y, vneg_vi_vi(e)); x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(m, vcast_vd_d(-1)), ddadd2_vd2_vd2_vd(m, vcast_vd_d(1))); x2 = ddsqu_vd2_vd2(x); t = vcast_vd_d(0.13860436390467167910856); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.131699838841615374240845)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.153914168346271945653214)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.181816523941564611721589)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.22222224632662035403996)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.285714285511134091777308)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.400000000000914013309483)); t = vmla_vd_vd_vd_vd(t, x2.x, vcast_vd_d(0.666666666666664853302393)); s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd(ddmul_vd2_vd2_vd2(x2, x), t)); return s; } EXPORT CONST vdouble xasinh(vdouble x) { vdouble y = vabs_vd_vd(x); vopmask o = vgt_vo_vd_vd(y, vcast_vd_d(1)); vdouble2 d; d = vsel_vd2_vo_vd2_vd2(o, ddrec_vd2_vd(x), vcast_vd2_vd_vd(y, vcast_vd_d(0))); d = ddsqrt_vd2_vd2(ddadd2_vd2_vd2_vd(ddsqu_vd2_vd2(d), vcast_vd_d(1))); d = vsel_vd2_vo_vd2_vd2(o, ddmul_vd2_vd2_vd(d, y), d); d = logk2(ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd(d, x))); y = vadd_vd_vd_vd(d.x, d.y); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(SQRT_DBL_MAX)), visnan_vo_vd(y)), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), x), y); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); y = vsel_vd_vo_vd_vd(visnegzero_vo_vd(x), vcast_vd_d(-0.0), y); return y; } EXPORT CONST vdouble xacosh(vdouble x) { vdouble2 d = logk2(ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddsqrt_vd2_vd2(ddadd2_vd2_vd_vd(x, vcast_vd_d(1))), ddsqrt_vd2_vd2(ddadd2_vd2_vd_vd(x, vcast_vd_d(-1)))), x)); vdouble y = vadd_vd_vd_vd(d.x, d.y); y = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vgt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(SQRT_DBL_MAX)), visnan_vo_vd(y)), vcast_vd_d(SLEEF_INFINITY), y); y = vreinterpret_vd_vm(vandnot_vm_vo64_vm(veq_vo_vd_vd(x, vcast_vd_d(1.0)), vreinterpret_vm_vd(y))); y = vreinterpret_vd_vm(vor_vm_vo64_vm(vlt_vo_vd_vd(x, vcast_vd_d(1.0)), vreinterpret_vm_vd(y))); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST vdouble xatanh(vdouble x) { vdouble y = vabs_vd_vd(x); vdouble2 d = logk2(dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(1), y), ddadd2_vd2_vd_vd(vcast_vd_d(1), vneg_vd_vd(y)))); y = vreinterpret_vd_vm(vor_vm_vo64_vm(vgt_vo_vd_vd(y, vcast_vd_d(1.0)), vreinterpret_vm_vd(vsel_vd_vo_vd_vd(veq_vo_vd_vd(y, vcast_vd_d(1.0)), vcast_vd_d(SLEEF_INFINITY), vmul_vd_vd_vd(vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(0.5)))))); y = vmulsign_vd_vd_vd(y, x); y = vreinterpret_vd_vm(vor_vm_vo64_vm(vor_vo_vo_vo(visinf_vo_vd(x), visnan_vo_vd(y)), vreinterpret_vm_vd(y))); y = vreinterpret_vd_vm(vor_vm_vo64_vm(visnan_vo_vd(x), vreinterpret_vm_vd(y))); return y; } EXPORT CONST vdouble xcbrt(vdouble d) { vdouble x, y, q = vcast_vd_d(1.0); vint e, qu, re; vdouble t; #ifdef ENABLE_AVX512F vdouble s = d; #endif e = vadd_vi_vi_vi(vilogbk_vi_vd(vabs_vd_vd(d)), vcast_vi_i(1)); d = vldexp2_vd_vd_vi(d, vneg_vi_vi(e)); t = vadd_vd_vd_vd(vcast_vd_vi(e), vcast_vd_d(6144)); qu = vtruncate_vi_vd(vmul_vd_vd_vd(t, vcast_vd_d(1.0/3.0))); re = vtruncate_vi_vd(vsub_vd_vd_vd(t, vmul_vd_vd_vd(vcast_vd_vi(qu), vcast_vd_d(3)))); q = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(1))), vcast_vd_d(1.2599210498948731647672106), q); q = vsel_vd_vo_vd_vd(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(2))), vcast_vd_d(1.5874010519681994747517056), q); q = vldexp2_vd_vd_vi(q, vsub_vi_vi_vi(qu, vcast_vi_i(2048))); q = vmulsign_vd_vd_vd(q, d); d = vabs_vd_vd(d); x = vcast_vd_d(-0.640245898480692909870982); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.96155103020039511818595)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-5.73353060922947843636166)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(6.03990368989458747961407)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-3.85841935510444988821632)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.2307275302496609725722)); y = vmul_vd_vd_vd(x, x); y = vmul_vd_vd_vd(y, y); x = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vmlapn_vd_vd_vd_vd(d, y, x), vcast_vd_d(1.0 / 3.0))); y = vmul_vd_vd_vd(vmul_vd_vd_vd(d, x), x); y = vmul_vd_vd_vd(vsub_vd_vd_vd(y, vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(2.0 / 3.0), y), vmla_vd_vd_vd_vd(y, x, vcast_vd_d(-1.0)))), q); #ifdef ENABLE_AVX512F y = vsel_vd_vo_vd_vd(visinf_vo_vd(s), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), s), y); y = vsel_vd_vo_vd_vd(veq_vo_vd_vd(s, vcast_vd_d(0)), vmulsign_vd_vd_vd(vcast_vd_d(0), s), y); #endif return y; } EXPORT CONST vdouble xcbrt_u1(vdouble d) { vdouble x, y, z, t; vdouble2 q2 = vcast_vd2_d_d(1, 0), u, v; vint e, qu, re; #ifdef ENABLE_AVX512F vdouble s = d; #endif e = vadd_vi_vi_vi(vilogbk_vi_vd(vabs_vd_vd(d)), vcast_vi_i(1)); d = vldexp2_vd_vd_vi(d, vneg_vi_vi(e)); t = vadd_vd_vd_vd(vcast_vd_vi(e), vcast_vd_d(6144)); qu = vtruncate_vi_vd(vmul_vd_vd_vd(t, vcast_vd_d(1.0/3.0))); re = vtruncate_vi_vd(vsub_vd_vd_vd(t, vmul_vd_vd_vd(vcast_vd_vi(qu), vcast_vd_d(3)))); q2 = vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(1))), vcast_vd2_d_d(1.2599210498948731907, -2.5899333753005069177e-17), q2); q2 = vsel_vd2_vo_vd2_vd2(vcast_vo64_vo32(veq_vo_vi_vi(re, vcast_vi_i(2))), vcast_vd2_d_d(1.5874010519681995834, -1.0869008194197822986e-16), q2); q2.x = vmulsign_vd_vd_vd(q2.x, d); q2.y = vmulsign_vd_vd_vd(q2.y, d); d = vabs_vd_vd(d); x = vcast_vd_d(-0.640245898480692909870982); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.96155103020039511818595)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-5.73353060922947843636166)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(6.03990368989458747961407)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(-3.85841935510444988821632)); x = vmla_vd_vd_vd_vd(x, d, vcast_vd_d(2.2307275302496609725722)); y = vmul_vd_vd_vd(x, x); y = vmul_vd_vd_vd(y, y); x = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vmlapn_vd_vd_vd_vd(d, y, x), vcast_vd_d(1.0 / 3.0))); z = x; u = ddmul_vd2_vd_vd(x, x); u = ddmul_vd2_vd2_vd2(u, u); u = ddmul_vd2_vd2_vd(u, d); u = ddadd2_vd2_vd2_vd(u, vneg_vd_vd(x)); y = vadd_vd_vd_vd(u.x, u.y); y = vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(-2.0 / 3.0), y), z); v = ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd(z, z), y); v = ddmul_vd2_vd2_vd(v, d); v = ddmul_vd2_vd2_vd2(v, q2); z = vldexp2_vd_vd_vi(vadd_vd_vd_vd(v.x, v.y), vsub_vi_vi_vi(qu, vcast_vi_i(2048))); #ifndef ENABLE_AVX512F z = vsel_vd_vo_vd_vd(visinf_vo_vd(d), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), q2.x), z); z = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vreinterpret_vd_vm(vsignbit_vm_vd(q2.x)), z); #else z = vsel_vd_vo_vd_vd(visinf_vo_vd(s), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), s), z); z = vsel_vd_vo_vd_vd(veq_vo_vd_vd(s, vcast_vd_d(0)), vmulsign_vd_vd_vd(vcast_vd_d(0), s), z); #endif return z; } EXPORT CONST vdouble xexp2(vdouble d) { vdouble u = vrint_vd_vd(d), s; vint q = vrint_vi_vd(u); s = vsub_vd_vd_vd(d, u); #ifdef SPLIT_KERNEL vdouble s2 = vmul_vd_vd_vd(s, s), v; u = vcast_vd_d(+0.4434359082926529454e-9); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.1017819260921760451e-6)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.1525273353517584730e-4)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.1333355814670499073e-2)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.5550410866482046596e-1)); v = vcast_vd_d(+0.7073164598085707425e-8); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.1321543872511327615e-5)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.1540353045101147808e-3)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.9618129107597600536e-2)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.2402265069591012214e+0)); u = vmla_vd_vd_vd_vd(u, s, v); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.6931471805599452862e+0)); #else u = vcast_vd_d(+0.4434359082926529454e-9); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.7073164598085707425e-8)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1017819260921760451e-6)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1321543872511327615e-5)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1525273353517584730e-4)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1540353045101147808e-3)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1333355814670499073e-2)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.9618129107597600536e-2)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5550410866482046596e-1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2402265069591012214e+0)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.6931471805599452862e+0)); #endif #ifdef ENABLE_FMA_DP u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(1)); #else u = ddnormalize_vd2_vd2(ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(u, s))).x; #endif u = vldexp2_vd_vd_vi(u, q); u = vsel_vd_vo_vd_vd(vge_vo_vd_vd(d, vcast_vd_d(1024)), vcast_vd_d(SLEEF_INFINITY), u); u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-2000)), vreinterpret_vm_vd(u))); return u; } EXPORT CONST vdouble xexp10(vdouble d) { vdouble u = vrint_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(LOG10_2))), s; vint q = vrint_vi_vd(u); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L10U), d); s = vmla_vd_vd_vd_vd(u, vcast_vd_d(-L10L), s); #ifdef SPLIT_KERNEL vdouble s2 = vmul_vd_vd_vd(s, s), v; u = vcast_vd_d(+0.2411463498334267652e-3); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.5013975546789733659e-2)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.6808936399446784138e-1)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.5393829292058536229e+0)); u = vmla_vd_vd_vd_vd(u, s2, vcast_vd_d(+0.2034678592293432953e+1)); v = vcast_vd_d(+0.1157488415217187375e-2); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.1959762320720533080e-1)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.2069958494722676234e+0)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.1171255148908541655e+1)); v = vmla_vd_vd_vd_vd(v, s2, vcast_vd_d(+0.2650949055239205876e+1)); u = vmla_vd_vd_vd_vd(u, s, v); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2302585092994045901e+1)); #else u = vcast_vd_d(+0.2411463498334267652e-3); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1157488415217187375e-2)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5013975546789733659e-2)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1959762320720533080e-1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.6808936399446784138e-1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2069958494722676234e+0)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.5393829292058536229e+0)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.1171255148908541655e+1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2034678592293432953e+1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2650949055239205876e+1)); u = vmla_vd_vd_vd_vd(u, s, vcast_vd_d(+0.2302585092994045901e+1)); #endif #ifdef ENABLE_FMA_DP u = vfma_vd_vd_vd_vd(u, s, vcast_vd_d(1)); #else u = ddnormalize_vd2_vd2(ddadd_vd2_vd_vd2(vcast_vd_d(1), ddmul_vd2_vd_vd(u, s))).x; #endif u = vldexp2_vd_vd_vi(u, q); u = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(308.25471555991671)), vcast_vd_d(SLEEF_INFINITY), u); u = vreinterpret_vd_vm(vandnot_vm_vo64_vm(vlt_vo_vd_vd(d, vcast_vd_d(-350)), vreinterpret_vm_vd(u))); return u; } EXPORT CONST vdouble xexpm1(vdouble a) { vdouble2 d = ddadd2_vd2_vd2_vd(expk2(vcast_vd2_vd_vd(a, vcast_vd_d(0))), vcast_vd_d(-1.0)); vdouble x = vadd_vd_vd_vd(d.x, d.y); x = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(a, vcast_vd_d(709.782712893383996732223)), vcast_vd_d(SLEEF_INFINITY), x); x = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(a, vcast_vd_d(-36.736800569677101399113302437)), vcast_vd_d(-1), x); x = vsel_vd_vo_vd_vd(visnegzero_vo_vd(a), vcast_vd_d(-0.0), x); return x; } EXPORT CONST vdouble xlog10(vdouble d) { vdouble2 x; vdouble t, m, x2; #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(1LL << 32) * (double)(1LL << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); x2 = vmul_vd_vd_vd(x.x, x.x); t = vcast_vd_d(+0.6653725819576758460e-1); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.6625722782820833712e-1)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.7898105214313944078e-1)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.9650955035715275132e-1)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.1240841409721444993e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.1737177927454605086e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.2895296546021972617e+0)); #ifndef ENABLE_AVX512F vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.30102999566398119802, -2.803728127785170339e-18), vcast_vd_vi(e)); #else vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.30102999566398119802, -2.803728127785170339e-18), e); #endif s = ddadd_vd2_vd2_vd2(s, ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(0.86858896380650363334, 1.1430059694096389311e-17))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, x.x), t)); vdouble r = vadd_vd_vd_vd(s.x, s.y); #ifndef ENABLE_AVX512F r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); #else r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } EXPORT CONST vdouble xlog2(vdouble d) { vdouble2 x; vdouble t, m, x2; #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vd_vd(d, vcast_vd_d(DBL_MIN)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d((double)(1LL << 32) * (double)(1LL << 32))), d); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); m = vldexp3_vd_vd_vi(d, vneg_vi_vi(e)); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(d, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); m = vgetmant_vd_vd(d); #endif x = dddiv_vd2_vd2_vd2(ddadd2_vd2_vd_vd(vcast_vd_d(-1), m), ddadd2_vd2_vd_vd(vcast_vd_d(1), m)); x2 = vmul_vd_vd_vd(x.x, x.x); t = vcast_vd_d(+0.2211941750456081490e+0); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.2200768693152277689e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.2623708057488514656e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.3205977477944495502e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.4121985945485324709e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.5770780162997058982e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(+0.96179669392608091449 )); #ifndef ENABLE_AVX512F vdouble2 s = ddadd2_vd2_vd_vd2(vcast_vd_vi(e), ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(2.885390081777926774, 6.0561604995516736434e-18))); #else vdouble2 s = ddadd2_vd2_vd_vd2(e, ddmul_vd2_vd2_vd2(x, vcast_vd2_d_d(2.885390081777926774, 6.0561604995516736434e-18))); #endif s = ddadd2_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, x.x), t)); vdouble r = vadd_vd_vd_vd(s.x, s.y); #ifndef ENABLE_AVX512F r = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(0)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(-SLEEF_INFINITY), r); #else r = vfixup_vd_vd_vd_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } EXPORT CONST vdouble xlog1p(vdouble d) { vdouble2 x; vdouble t, m, x2; vdouble dp1 = vadd_vd_vd_vd(d, vcast_vd_d(1)); #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vd_vd(dp1, vcast_vd_d(DBL_MIN)); dp1 = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(dp1, vcast_vd_d((double)(1LL << 32) * (double)(1LL << 32))), dp1); vint e = vilogb2k_vi_vd(vmul_vd_vd_vd(dp1, vcast_vd_d(1.0/0.75))); t = vldexp3_vd_vd_vi(vcast_vd_d(1), vneg_vi_vi(e)); m = vmla_vd_vd_vd_vd(d, t, vsub_vd_vd_vd(t, vcast_vd_d(1))); e = vsel_vi_vo_vi_vi(vcast_vo32_vo64(o), vsub_vi_vi_vi(e, vcast_vi_i(64)), e); vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), vcast_vd_vi(e)); #else vdouble e = vgetexp_vd_vd(vmul_vd_vd_vd(dp1, vcast_vd_d(1.0/0.75))); e = vsel_vd_vo_vd_vd(vispinf_vo_vd(e), vcast_vd_d(1024.0), e); t = vldexp3_vd_vd_vi(vcast_vd_d(1), vneg_vi_vi(vrint_vi_vd(e))); m = vmla_vd_vd_vd_vd(d, t, vsub_vd_vd_vd(t, vcast_vd_d(1))); vdouble2 s = ddmul_vd2_vd2_vd(vcast_vd2_d_d(0.693147180559945286226764, 2.319046813846299558417771e-17), e); #endif x = dddiv_vd2_vd2_vd2(vcast_vd2_vd_vd(m, vcast_vd_d(0)), ddadd_vd2_vd_vd(vcast_vd_d(2), m)); x2 = vmul_vd_vd_vd(x.x, x.x); t = vcast_vd_d(0.1532076988502701353e+0); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.1525629051003428716e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.1818605932937785996e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.2222214519839380009e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.2857142932794299317e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.3999999999635251990e+0)); t = vmla_vd_vd_vd_vd(t, x2, vcast_vd_d(0.6666666666667333541e+0)); s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, x.x), t)); vdouble r = vadd_vd_vd_vd(s.x, s.y); r = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(1e+307)), vcast_vd_d(SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(-1)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(-1)), vcast_vd_d(-SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), r); return r; } // static INLINE CONST vint2 vcast_vi2_i_i(int i0, int i1) { return vcast_vi2_vm(vcast_vm_i_i(i0, i1)); } static INLINE CONST vint2 vrev21_vi2_vi2(vint2 i) { return vreinterpret_vi2_vf(vrev21_vf_vf(vreinterpret_vf_vi2(i))); } EXPORT CONST vdouble xfabs(vdouble x) { return vabs_vd_vd(x); } EXPORT CONST vdouble xcopysign(vdouble x, vdouble y) { return vcopysign_vd_vd_vd(x, y); } EXPORT CONST vdouble xfmax(vdouble x, vdouble y) { #if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmax_vd_vd_vd(x, y)); #else return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(x, y), x, y)); #endif } EXPORT CONST vdouble xfmin(vdouble x, vdouble y) { #if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmin_vd_vd_vd(x, y)); #else return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(y, x), x, y)); #endif } EXPORT CONST vdouble xfdim(vdouble x, vdouble y) { vdouble ret = vsub_vd_vd_vd(x, y); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(ret, vcast_vd_d(0)), veq_vo_vd_vd(x, y)), vcast_vd_d(0), ret); return ret; } EXPORT CONST vdouble xtrunc(vdouble x) { vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(1LL << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (1LL << 31))))))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(1LL << 52))), x, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); } EXPORT CONST vdouble xfloor(vdouble x) { vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(1LL << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (1LL << 31))))))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); fr = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(fr, vcast_vd_d(0)), vadd_vd_vd_vd(fr, vcast_vd_d(1.0)), fr); return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(1LL << 52))), x, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); } EXPORT CONST vdouble xceil(vdouble x) { vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(1LL << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (1LL << 31))))))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); fr = vsel_vd_vo_vd_vd(vle_vo_vd_vd(fr, vcast_vd_d(0)), fr, vsub_vd_vd_vd(fr, vcast_vd_d(1.0))); return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(x), vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(1LL << 52))), x, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), x)); } EXPORT CONST vdouble xround(vdouble d) { vdouble x = vadd_vd_vd_vd(d, vcast_vd_d(0.5)); vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(1LL << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (1LL << 31))))))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); x = vsel_vd_vo_vd_vd(vand_vo_vo_vo(vle_vo_vd_vd(x, vcast_vd_d(0)), veq_vo_vd_vd(fr, vcast_vd_d(0))), vsub_vd_vd_vd(x, vcast_vd_d(1.0)), x); fr = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(fr, vcast_vd_d(0)), vadd_vd_vd_vd(fr, vcast_vd_d(1.0)), fr); x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0.49999999999999994449)), vcast_vd_d(0), x); return vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(d), vge_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1LL << 52))), d, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), d)); } EXPORT CONST vdouble xrint(vdouble d) { vdouble x = vadd_vd_vd_vd(d, vcast_vd_d(0.5)); vdouble fr = vsub_vd_vd_vd(x, vmul_vd_vd_vd(vcast_vd_d(1LL << 31), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (1LL << 31))))))); vopmask isodd = vcast_vo64_vo32(veq_vo_vi_vi(vand_vi_vi_vi(vcast_vi_i(1), vtruncate_vi_vd(fr)), vcast_vi_i(1))); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); fr = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(fr, vcast_vd_d(0)), vand_vo_vo_vo(veq_vo_vd_vd(fr, vcast_vd_d(0)), isodd)), vadd_vd_vd_vd(fr, vcast_vd_d(1.0)), fr); x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0.50000000000000011102)), vcast_vd_d(0), x); vdouble ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visinf_vo_vd(d), vge_vo_vd_vd(vabs_vd_vd(d), vcast_vd_d(1LL << 52))), d, vcopysign_vd_vd_vd(vsub_vd_vd_vd(x, fr), d)); return ret; } EXPORT CONST vdouble xnextafter(vdouble x, vdouble y) { x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vmulsign_vd_vd_vd(vcast_vd_d(0), y), x); vint2 t, xi2 = vreinterpret_vi2_vd(x); vopmask c = vxor_vo_vo_vo(vsignbit_vo_vd(x), vge_vo_vd_vd(y, x)); t = vadd_vi2_vi2_vi2(vxor_vi2_vi2_vi2(xi2, vcast_vi2_i_i(0x7fffffff, 0xffffffff)), vcast_vi2_i_i(0, 1)); t = vadd_vi2_vi2_vi2(t, vrev21_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i_i(0, 1), veq_vi2_vi2_vi2(t, vcast_vi2_i_i(-1, 0))))); xi2 = vreinterpret_vi2_vd(vsel_vd_vo_vd_vd(c, vreinterpret_vd_vi2(t), vreinterpret_vd_vi2(xi2))); xi2 = vsub_vi2_vi2_vi2(xi2, vcast_vi2_vm(vand_vm_vo64_vm(vneq_vo_vd_vd(x, y), vcast_vm_i_i(0, 1)))); xi2 = vreinterpret_vi2_vd(vsel_vd_vo_vd_vd(vneq_vo_vd_vd(x, y), vreinterpret_vd_vi2(vadd_vi2_vi2_vi2(xi2, vrev21_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i_i(0, -1), veq_vi2_vi2_vi2(xi2, vcast_vi2_i_i(0, -1)))))), vreinterpret_vd_vi2(xi2))); t = vadd_vi2_vi2_vi2(vxor_vi2_vi2_vi2(xi2, vcast_vi2_i_i(0x7fffffff, 0xffffffff)), vcast_vi2_i_i(0, 1)); t = vadd_vi2_vi2_vi2(t, vrev21_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i_i(0, 1), veq_vi2_vi2_vi2(t, vcast_vi2_i_i(-1, 0))))); xi2 = vreinterpret_vi2_vd(vsel_vd_vo_vd_vd(c, vreinterpret_vd_vi2(t), vreinterpret_vd_vi2(xi2))); vdouble ret = vreinterpret_vd_vi2(xi2); ret = vsel_vd_vo_vd_vd(vand_vo_vo_vo(veq_vo_vd_vd(ret, vcast_vd_d(0)), vneq_vo_vd_vd(x, vcast_vd_d(0))), vmulsign_vd_vd_vd(vcast_vd_d(0), x), ret); ret = vsel_vd_vo_vd_vd(vand_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(0)), veq_vo_vd_vd(y, vcast_vd_d(0))), y, ret); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vcast_vd_d(SLEEF_NAN), ret); return ret; } EXPORT CONST vdouble xfrfrexp(vdouble x) { x = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(DBL_MIN)), vmul_vd_vd_vd(x, vcast_vd_d(1ULL << 63)), x); vmask xm = vreinterpret_vm_vd(x); xm = vand_vm_vm_vm(xm, vcast_vm_i_i(~0x7ff00000, ~0)); xm = vor_vm_vm_vm (xm, vcast_vm_i_i( 0x3fe00000, 0)); vdouble ret = vreinterpret_vd_vm(xm); ret = vsel_vd_vo_vd_vd(visinf_vo_vd(x), vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), x), ret); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), x, ret); return ret; } EXPORT CONST vint xexpfrexp(vdouble x) { x = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(DBL_MIN)), vmul_vd_vd_vd(x, vcast_vd_d(1ULL << 63)), x); vint ret = vcastu_vi_vi2(vreinterpret_vi2_vd(x)); ret = vsub_vi_vi_vi(vand_vi_vi_vi(vsrl_vi_vi_i(ret, 20), vcast_vi_i(0x7ff)), vcast_vi_i(0x3fe)); ret = vsel_vi_vo_vi_vi(vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(0)), visnan_vo_vd(x)), visinf_vo_vd(x)), vcast_vi_i(0), ret); return ret; } EXPORT CONST vdouble xfma(vdouble x, vdouble y, vdouble z) { vdouble h2 = vadd_vd_vd_vd(vmul_vd_vd_vd(x, y), z), q = vcast_vd_d(1); vopmask o = vlt_vo_vd_vd(vabs_vd_vd(h2), vcast_vd_d(1e-300)); { const double c0 = 1ULL << 54, c1 = c0 * c0, c2 = c1 * c1; x = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(x, vcast_vd_d(c1)), x); y = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(y, vcast_vd_d(c1)), y); z = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(z, vcast_vd_d(c2)), z); q = vsel_vd_vo_vd_vd(o, vcast_vd_d(1.0 / c2), q); } o = vgt_vo_vd_vd(vabs_vd_vd(h2), vcast_vd_d(1e+300)); { const double c0 = 1ULL << 54, c1 = c0 * c0, c2 = c1 * c1; x = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(x, vcast_vd_d(1.0 / c1)), x); y = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(y, vcast_vd_d(1.0 / c1)), y); z = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(z, vcast_vd_d(1.0 / c2)), z); q = vsel_vd_vo_vd_vd(o, vcast_vd_d(c2), q); } vdouble2 d = ddmul_vd2_vd_vd(x, y); d = ddadd2_vd2_vd2_vd(d, z); vdouble ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(0)), veq_vo_vd_vd(y, vcast_vd_d(0))), z, vadd_vd_vd_vd(d.x, d.y)); o = visinf_vo_vd(z); o = vandnot_vo_vo_vo(visinf_vo_vd(x), o); o = vandnot_vo_vo_vo(visnan_vo_vd(x), o); o = vandnot_vo_vo_vo(visinf_vo_vd(y), o); o = vandnot_vo_vo_vo(visnan_vo_vd(y), o); h2 = vsel_vd_vo_vd_vd(o, z, h2); o = vor_vo_vo_vo(visinf_vo_vd(h2), visnan_vo_vd(h2)); return vsel_vd_vo_vd_vd(o, h2, vmul_vd_vd_vd(ret, q)); } SQRTU05_FUNCATR vdouble xsqrt_u05(vdouble d) { vdouble q; vopmask o; d = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(d, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), d); o = vlt_vo_vd_vd(d, vcast_vd_d(8.636168555094445E-78)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(1.157920892373162E77)), d); q = vsel_vd_vo_vd_vd(o, vcast_vd_d(2.9387358770557188E-39*0.5), vcast_vd_d(0.5)); o = vgt_vo_vd_vd(d, vcast_vd_d(1.3407807929942597e+154)); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(7.4583407312002070e-155)), d); q = vsel_vd_vo_vd_vd(o, vcast_vd_d(1.1579208923731620e+77*0.5), q); vdouble x = vreinterpret_vd_vi2(vsub_vi2_vi2_vi2(vcast_vi2_i_i(0x5fe6ec86, 0), vsrl_vi2_vi2_i(vreinterpret_vi2_vd(vadd_vd_vd_vd(d, vcast_vd_d(1e-320))), 1))); x = vmul_vd_vd_vd(x, vsub_vd_vd_vd(vcast_vd_d(1.5), vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(0.5), d), x), x))); x = vmul_vd_vd_vd(x, vsub_vd_vd_vd(vcast_vd_d(1.5), vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(0.5), d), x), x))); x = vmul_vd_vd_vd(x, vsub_vd_vd_vd(vcast_vd_d(1.5), vmul_vd_vd_vd(vmul_vd_vd_vd(vmul_vd_vd_vd(vcast_vd_d(0.5), d), x), x))); x = vmul_vd_vd_vd(x, d); vdouble2 d2 = ddmul_vd2_vd2_vd2(ddadd2_vd2_vd_vd2(d, ddmul_vd2_vd_vd(x, x)), ddrec_vd2_vd(x)); x = vmul_vd_vd_vd(vadd_vd_vd_vd(d2.x, d2.y), q); x = vsel_vd_vo_vd_vd(vispinf_vo_vd(d), vcast_vd_d(SLEEF_INFINITY), x); x = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(0)), d, x); return x; } EXPORT CONST vdouble xsqrt(vdouble d) { #ifdef ACCURATE_SQRT return vsqrt_vd_vd(d); #endif // fall back to approximation if ACCURATE_SQRT is undefined return xsqrt_u05(d); } EXPORT CONST vdouble xsqrt_u35(vdouble d) { return xsqrt_u05(d); } EXPORT CONST vdouble xhypot_u05(vdouble x, vdouble y) { x = vabs_vd_vd(x); y = vabs_vd_vd(y); vdouble min = vmin_vd_vd_vd(x, y), n = min; vdouble max = vmax_vd_vd_vd(x, y), d = max; vopmask o = vlt_vo_vd_vd(max, vcast_vd_d(DBL_MIN)); n = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(n, vcast_vd_d(1ULL << 54)), n); d = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(d, vcast_vd_d(1ULL << 54)), d); vdouble2 t = dddiv_vd2_vd2_vd2(vcast_vd2_vd_vd(n, vcast_vd_d(0)), vcast_vd2_vd_vd(d, vcast_vd_d(0))); t = ddmul_vd2_vd2_vd(ddsqrt_vd2_vd2(ddadd2_vd2_vd2_vd(ddsqu_vd2_vd2(t), vcast_vd_d(1))), max); vdouble ret = vadd_vd_vd_vd(t.x, t.y); ret = vsel_vd_vo_vd_vd(visnan_vo_vd(ret), vcast_vd_d(SLEEF_INFINITY), ret); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(min, vcast_vd_d(0)), max, ret); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vcast_vd_d(SLEEF_NAN), ret); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(SLEEF_INFINITY)), veq_vo_vd_vd(y, vcast_vd_d(SLEEF_INFINITY))), vcast_vd_d(SLEEF_INFINITY), ret); return ret; } EXPORT CONST vdouble xhypot_u35(vdouble x, vdouble y) { x = vabs_vd_vd(x); y = vabs_vd_vd(y); vdouble min = vmin_vd_vd_vd(x, y); vdouble max = vmax_vd_vd_vd(x, y); vdouble t = vdiv_vd_vd_vd(min, max); vdouble ret = vmul_vd_vd_vd(max, vsqrt_vd_vd(vmla_vd_vd_vd_vd(t, t, vcast_vd_d(1)))); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(min, vcast_vd_d(0)), max, ret); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(visnan_vo_vd(x), visnan_vo_vd(y)), vcast_vd_d(SLEEF_NAN), ret); ret = vsel_vd_vo_vd_vd(vor_vo_vo_vo(veq_vo_vd_vd(x, vcast_vd_d(SLEEF_INFINITY)), veq_vo_vd_vd(y, vcast_vd_d(SLEEF_INFINITY))), vcast_vd_d(SLEEF_INFINITY), ret); return ret; } static INLINE CONST vdouble vtoward0(vdouble x) { // returns nextafter(x, 0) vdouble t = vreinterpret_vd_vm(vadd64_vm_vm_vm(vreinterpret_vm_vd(x), vcast_vm_i_i(-1, -1))); return vsel_vd_vo_vd_vd(veq_vo_vd_vd(x, vcast_vd_d(0)), vcast_vd_d(0), t); } static INLINE CONST vdouble vptrunc(vdouble x) { // round to integer toward 0, positive argument only #ifdef FULL_FP_ROUNDING return vtruncate_vd_vd(x); #else vdouble fr = vmla_vd_vd_vd_vd(vcast_vd_d(-(double)(1LL << 31)), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(x, vcast_vd_d(1.0 / (1LL << 31))))), x); fr = vsub_vd_vd_vd(fr, vcast_vd_vi(vtruncate_vi_vd(fr))); return vsel_vd_vo_vd_vd(vge_vo_vd_vd(vabs_vd_vd(x), vcast_vd_d(1LL << 52)), x, vsub_vd_vd_vd(x, fr)); #endif } /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ EXPORT CONST vdouble xfmod(vdouble x, vdouble y) { vdouble nu = vabs_vd_vd(x), de = vabs_vd_vd(y), s = vcast_vd_d(1), q; vopmask o = vlt_vo_vd_vd(de, vcast_vd_d(DBL_MIN)); nu = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(nu, vcast_vd_d(1ULL << 54)), nu); de = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(de, vcast_vd_d(1ULL << 54)), de); s = vsel_vd_vo_vd_vd(o, vmul_vd_vd_vd(s , vcast_vd_d(1.0 / (1ULL << 54))), s); vdouble rde = vtoward0(vrec_vd_vd(de)); vdouble2 r = vcast_vd2_vd_vd(nu, vcast_vd_d(0)); for(int i=0;i<21;i++) { // ceil(log2(DBL_MAX) / 51) + 1 q = vsel_vd_vo_vd_vd(vand_vo_vo_vo(vgt_vo_vd_vd(vadd_vd_vd_vd(de, de), r.x), vge_vo_vd_vd(r.x, de)), vcast_vd_d(1), vmul_vd_vd_vd(vtoward0(r.x), rde)); q = vreinterpret_vd_vm(vand_vm_vm_vm(vreinterpret_vm_vd(vptrunc(q)), vcast_vm_i_i(0xffffffff, 0xfffffffe))); r = ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd2(r, ddmul_vd2_vd_vd(q, vneg_vd_vd(de)))); if (vtestallones_i_vo64(vlt_vo_vd_vd(r.x, de))) break; } vdouble ret = vmul_vd_vd_vd(r.x, s); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(vadd_vd_vd_vd(r.x, r.y), de), vcast_vd_d(0), ret); ret = vmulsign_vd_vd_vd(ret, x); ret = vsel_vd_vo_vd_vd(vlt_vo_vd_vd(nu, de), x, ret); ret = vsel_vd_vo_vd_vd(veq_vo_vd_vd(de, vcast_vd_d(0)), vcast_vd_d(SLEEF_NAN), ret); return ret; } #ifdef ENABLE_SVE typedef __sizeless_struct { vdouble2 a, b; } dd2; #else typedef struct { vdouble2 a, b; } dd2; #endif /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ static CONST dd2 gammak(vdouble a) { vdouble2 clc = vcast_vd2_d_d(0, 0), clln = vcast_vd2_d_d(1, 0), clld = vcast_vd2_d_d(1, 0); vdouble2 v = vcast_vd2_d_d(1, 0), x, y, z; vdouble t, u; vopmask otiny = vlt_vo_vd_vd(vabs_vd_vd(a), vcast_vd_d(1e-306)), oref = vlt_vo_vd_vd(a, vcast_vd_d(0.5)); x = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_d_d(0, 0), vsel_vd2_vo_vd2_vd2(oref, ddadd2_vd2_vd_vd(vcast_vd_d(1), vneg_vd_vd(a)), vcast_vd2_vd_vd(a, vcast_vd_d(0)))); vopmask o0 = vand_vo_vo_vo(vle_vo_vd_vd(vcast_vd_d(0.5), x.x), vle_vo_vd_vd(x.x, vcast_vd_d(1.1))); vopmask o2 = vle_vo_vd_vd(vcast_vd_d(2.3), x.x); y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(1)), x)); y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(2)), y)); y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(3)), y)); y = ddnormalize_vd2_vd2(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(4)), y)); vopmask o = vand_vo_vo_vo(o2, vle_vo_vd_vd(x.x, vcast_vd_d(7))); clln = vsel_vd2_vo_vd2_vd2(o, y, clln); x = vsel_vd2_vo_vd2_vd2(o, ddadd2_vd2_vd2_vd(x, vcast_vd_d(5)), x); t = vsel_vd_vo_vd_vd(o2, vrec_vd_vd(x.x), ddnormalize_vd2_vd2(ddadd2_vd2_vd2_vd(x, vsel_vd_vo_d_d(o0, -1, -2))).x); u = vsel_vd_vo_vo_d_d_d(o2, o0, -156.801412704022726379848862, +0.2947916772827614196e+2, +0.7074816000864609279e-7); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +1.120804464289911606838558160000, +0.1281459691827820109e+3, +0.4009244333008730443e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +13.39798545514258921833306020000, +0.2617544025784515043e+3, +0.1040114641628246946e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.116546276599463200848033357000, +0.3287022855685790432e+3, +0.1508349150733329167e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -1.391801093265337481495562410000, +0.2818145867730348186e+3, +0.1288143074933901020e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.015056113040026424412918973400, +0.1728670414673559605e+3, +0.4744167749884993937e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.179540117061234856098844714000, +0.7748735764030416817e+2, -0.6554816306542489902e-7)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.002481743600264997730942489280, +0.2512856643080930752e+2, -0.3189252471452599844e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.029527880945699120504851034100, +0.5766792106140076868e+1, +0.1358883821470355377e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.000540164767892604515196325186, +0.7270275473996180571e+0, -0.4343931277157336040e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.006403362833808069794787256200, +0.8396709124579147809e-1, +0.9724785897406779555e-6)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.000162516262783915816896611252, -0.8211558669746804595e-1, -0.2036886057225966011e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.001914438498565477526465972390, +0.6828831828341884458e-1, +0.4373363141819725815e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +7.20489541602001055898311517e-05, -0.7712481339961671511e-1, -0.9439951268304008677e-5)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.000839498720672087279971000786, +0.8337492023017314957e-1, +0.2050727030376389804e-4)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -5.17179090826059219329394422e-05, -0.9094964931456242518e-1, -0.4492620183431184018e-4)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.000592166437353693882857342347, +0.1000996313575929358e+0, +0.9945751236071875931e-4)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +6.97281375836585777403743539e-05, -0.1113342861544207724e+0, -0.2231547599034983196e-3)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.000784039221720066627493314301, +0.1255096673213020875e+0, +0.5096695247101967622e-3)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.000229472093621399176949318732, -0.1440498967843054368e+0, -0.1192753911667886971e-2)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, -0.002681327160493827160473958490, +0.1695571770041949811e+0, +0.2890510330742210310e-2)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.003472222222222222222175164840, -0.2073855510284092762e+0, -0.7385551028674461858e-2)); u = vmla_vd_vd_vd_vd(u, t, vsel_vd_vo_vo_d_d_d(o2, o0, +0.083333333333333333335592087900, +0.2705808084277815939e+0, +0.2058080842778455335e-1)); y = ddmul_vd2_vd2_vd2(ddadd2_vd2_vd2_vd(x, vcast_vd_d(-0.5)), logk2(x)); y = ddadd2_vd2_vd2_vd2(y, ddneg_vd2_vd2(x)); y = ddadd2_vd2_vd2_vd2(y, vcast_vd2_d_d(0.91893853320467278056, -3.8782941580672414498e-17)); // 0.5*log(2*M_PI) z = ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd (u, t), vsel_vd_vo_d_d(o0, -0.4006856343865314862e+0, -0.6735230105319810201e-1)); z = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(z, t), vsel_vd_vo_d_d(o0, +0.8224670334241132030e+0, +0.3224670334241132030e+0)); z = ddadd2_vd2_vd2_vd(ddmul_vd2_vd2_vd(z, t), vsel_vd_vo_d_d(o0, -0.5772156649015328655e+0, +0.4227843350984671345e+0)); z = ddmul_vd2_vd2_vd(z, t); clc = vsel_vd2_vo_vd2_vd2(o2, y, z); clld = vsel_vd2_vo_vd2_vd2(o2, ddadd2_vd2_vd2_vd(ddmul_vd2_vd_vd(u, t), vcast_vd_d(1)), clld); y = clln; clc = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_d_d(83.1776616671934334590333, 3.67103459631568507221878e-15), // log(2^120) vsel_vd2_vo_vd2_vd2(oref, ddadd2_vd2_vd2_vd2(vcast_vd2_d_d(1.1447298858494001639, 1.026595116270782638e-17), ddneg_vd2_vd2(clc)), clc)); // log(M_PI) clln = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_d_d(1, 0), vsel_vd2_vo_vd2_vd2(oref, clln, clld)); if (!vtestallones_i_vo64(vnot_vo64_vo64(oref))) { t = vsub_vd_vd_vd(a, vmul_vd_vd_vd(vcast_vd_d(1LL << 28), vcast_vd_vi(vtruncate_vi_vd(vmul_vd_vd_vd(a, vcast_vd_d(1.0 / (1LL << 28))))))); x = ddmul_vd2_vd2_vd2(clld, sinpik(t)); } clld = vsel_vd2_vo_vd2_vd2(otiny, vcast_vd2_vd_vd(vmul_vd_vd_vd(a, vcast_vd_d((1LL << 60)*(double)(1LL << 60))), vcast_vd_d(0)), vsel_vd2_vo_vd2_vd2(oref, x, y)); dd2 ret = { clc, dddiv_vd2_vd2_vd2(clln, clld) }; return ret; } EXPORT CONST vdouble xtgamma_u1(vdouble a) { dd2 d = gammak(a); vdouble2 y = ddmul_vd2_vd2_vd2(expk2(d.a), d.b); vdouble r = vadd_vd_vd_vd(y.x, y.y); vopmask o; o = vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(a, vcast_vd_d(-SLEEF_INFINITY)), vand_vo_vo_vo(vlt_vo_vd_vd(a, vcast_vd_d(0)), visint_vo_vd(a))), vand_vo_vo_vo(vand_vo_vo_vo(visnumber_vo_vd(a), vlt_vo_vd_vd(a, vcast_vd_d(0))), visnan_vo_vd(r))); r = vsel_vd_vo_vd_vd(o, vcast_vd_d(SLEEF_NAN), r); o = vand_vo_vo_vo(vand_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(a, vcast_vd_d(SLEEF_INFINITY)), visnumber_vo_vd(a)), vge_vo_vd_vd(a, vcast_vd_d(-DBL_MIN))), vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vd_vd(a, vcast_vd_d(0)), vgt_vo_vd_vd(a, vcast_vd_d(200))), visnan_vo_vd(r))); r = vsel_vd_vo_vd_vd(o, vmulsign_vd_vd_vd(vcast_vd_d(SLEEF_INFINITY), a), r); return r; } EXPORT CONST vdouble xlgamma_u1(vdouble a) { dd2 d = gammak(a); vdouble2 y = ddadd2_vd2_vd2_vd2(d.a, logk2(ddabs_vd2_vd2(d.b))); vdouble r = vadd_vd_vd_vd(y.x, y.y); vopmask o; o = vor_vo_vo_vo(visinf_vo_vd(a), vor_vo_vo_vo(vand_vo_vo_vo(vle_vo_vd_vd(a, vcast_vd_d(0)), visint_vo_vd(a)), vand_vo_vo_vo(visnumber_vo_vd(a), visnan_vo_vd(r)))); r = vsel_vd_vo_vd_vd(o, vcast_vd_d(SLEEF_INFINITY), r); return r; } /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ EXPORT CONST vdouble xerf_u1(vdouble a) { vdouble s = a, t, u; vdouble2 d; a = vabs_vd_vd(a); vopmask o0 = vlt_vo_vd_vd(a, vcast_vd_d(1.0)); vopmask o1 = vlt_vo_vd_vd(a, vcast_vd_d(3.7)); vopmask o2 = vlt_vo_vd_vd(a, vcast_vd_d(6.0)); u = vsel_vd_vo_vd_vd(o0, vmul_vd_vd_vd(a, a), a); t = vsel_vd_vo_vo_d_d_d(o0, o1, +0.6801072401395392157e-20, +0.2830954522087717660e-13, -0.5846750404269610493e-17); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.2161766247570056391e-18, -0.1509491946179481940e-11, +0.6076691048812607898e-15)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.4695919173301598752e-17, +0.3827857177807173152e-10, -0.3007518609604893831e-13)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.9049140419888010819e-16, -0.6139733921558987241e-09, +0.9427906260824646063e-12)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1634018903557411517e-14, +0.6985387934608038824e-08, -0.2100110908269393629e-10)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.2783485786333455216e-13, -0.5988224513034371474e-07, +0.3534639523461223473e-09)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.4463221276786412722e-12, +0.4005716952355346640e-06, -0.4664967728285395926e-08)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.6711366622850138987e-11, -0.2132190104575784400e-05, +0.4943823283769000532e-07)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.9422759050232658346e-10, +0.9092461304042630325e-05, -0.4271203394761148254e-06)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.1229055530100228477e-08, -0.3079188080966205457e-04, +0.3034067677404915895e-05)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1480719281585085023e-07, +0.7971413443082370762e-04, -0.1776295289066871135e-04)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.1636584469123402714e-06, -0.1387853215225442864e-03, +0.8524547630559505050e-04)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1646211436588923363e-05, +0.6469678026257590965e-04, -0.3290582944961784398e-03)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.1492565035840624866e-04, +0.4996645280372945860e-03, +0.9696966068789101157e-03)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1205533298178966496e-03, -0.1622802482842520535e-02, -0.1812527628046986137e-02)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.8548327023450851166e-03, +0.1615320557049377171e-03, -0.4725409828123619017e-03)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.5223977625442188799e-02, +0.1915262325574875607e-01, +0.2090315427924229266e-01)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.2686617064513125569e-01, -0.1027818298486033455e+00, -0.1052041921842776645e+00)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, +0.1128379167095512753e+00, -0.6366172819842503827e+00, -0.6345351808766568347e+00)); t = vmla_vd_vd_vd_vd(t, u, vsel_vd_vo_vo_d_d_d(o0, o1, -0.3761263890318375380e+00, -0.1128379590648910469e+01, -0.1129442929103524396e+01)); d = ddmul_vd2_vd_vd(t, u); d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_d_d_d(o0, o1, 1.1283791670955125586, 3.4110644736196137587e-08, 0.00024963035690526438285), vsel_vd_vo_vo_d_d_d(o0, o1, 1.5335459613165822674e-17, -2.4875650708323294246e-24, -5.4362665034856259795e-21))); d = vsel_vd2_vo_vd2_vd2(o0, ddmul_vd2_vd2_vd(d, a), ddadd_vd2_vd_vd2(vcast_vd_d(1.0), ddneg_vd2_vd2(expk2(d)))); u = vmulsign_vd_vd_vd(vsel_vd_vo_vd_vd(o2, vadd_vd_vd_vd(d.x, d.y), vcast_vd_d(1)), s); u = vsel_vd_vo_vd_vd(visnan_vo_vd(a), vcast_vd_d(SLEEF_NAN), u); return u; } /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ EXPORT CONST vdouble xerfc_u15(vdouble a) { vdouble s = a, r = vcast_vd_d(0), t; vdouble2 u, d, x; a = vabs_vd_vd(a); vopmask o0 = vlt_vo_vd_vd(a, vcast_vd_d(1.0)); vopmask o1 = vlt_vo_vd_vd(a, vcast_vd_d(2.2)); vopmask o2 = vlt_vo_vd_vd(a, vcast_vd_d(4.2)); vopmask o3 = vlt_vo_vd_vd(a, vcast_vd_d(27.3)); u = vsel_vd2_vo_vd2_vd2(o0, ddmul_vd2_vd_vd(a, a), vsel_vd2_vo_vd2_vd2(o1, vcast_vd2_vd_vd(a, vcast_vd_d(0)), dddiv_vd2_vd2_vd2(vcast_vd2_d_d(1, 0), vcast_vd2_vd_vd(a, vcast_vd_d(0))))); t = vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.6801072401395386139e-20, +0.3438010341362585303e-12, -0.5757819536420710449e+2, +0.2334249729638701319e+5); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.2161766247570055669e-18, -0.1237021188160598264e-10, +0.4669289654498104483e+3, -0.4695661044933107769e+5)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.4695919173301595670e-17, +0.2117985839877627852e-09, -0.1796329879461355858e+4, +0.3173403108748643353e+5)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.9049140419888007122e-16, -0.2290560929177369506e-08, +0.4355892193699575728e+4, +0.3242982786959573787e+4)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1634018903557410728e-14, +0.1748931621698149538e-07, -0.7456258884965764992e+4, -0.2014717999760347811e+5)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.2783485786333451745e-13, -0.9956602606623249195e-07, +0.9553977358167021521e+4, +0.1554006970967118286e+5)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.4463221276786415752e-12, +0.4330010240640327080e-06, -0.9470019905444229153e+4, -0.6150874190563554293e+4)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.6711366622850136563e-11, -0.1435050600991763331e-05, +0.7387344321849855078e+4, +0.1240047765634815732e+4)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.9422759050232662223e-10, +0.3460139479650695662e-05, -0.4557713054166382790e+4, -0.8210325475752699731e+2)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.1229055530100229098e-08, -0.4988908180632898173e-05, +0.2207866967354055305e+4, +0.3242443880839930870e+2)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1480719281585086512e-07, -0.1308775976326352012e-05, -0.8217975658621754746e+3, -0.2923418863833160586e+2)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.1636584469123399803e-06, +0.2825086540850310103e-04, +0.2268659483507917400e+3, +0.3457461732814383071e+0)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1646211436588923575e-05, -0.6393913713069986071e-04, -0.4633361260318560682e+2, +0.5489730155952392998e+1)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.1492565035840623511e-04, -0.2566436514695078926e-04, +0.9557380123733945965e+1, +0.1559934132251294134e-2)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.1205533298178967851e-03, +0.5895792375659440364e-03, -0.2958429331939661289e+1, -0.1541741566831520638e+1)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.8548327023450850081e-03, -0.1695715579163588598e-02, +0.1670329508092765480e+0, +0.2823152230558364186e-5)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, +0.5223977625442187932e-02, +0.2089116434918055149e-03, +0.6096615680115419211e+0, +0.6249999184195342838e+0)); t = vmla_vd_vd_vd_vd(t, u.x, vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.2686617064513125222e-01, +0.1912855949584917753e-01, +0.1059212443193543585e-2, +0.1741749416408701288e-8)); d = ddmul_vd2_vd2_vd(u, t); d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 0.11283791670955126141, -0.10277263343147646779, -0.50005180473999022439, -0.5000000000258444377), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -4.0175691625932118483e-18, -6.2338714083404900225e-18, 2.6362140569041995803e-17, -4.0074044712386992281e-17))); d = ddmul_vd2_vd2_vd2(d, u); d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.37612638903183753802, -0.63661976742916359662, 1.601106273924963368e-06, 2.3761973137523364792e-13), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 1.3391897206042552387e-17, 7.6321019159085724662e-18, 1.1974001857764476775e-23, -1.1670076950531026582e-29))); d = ddmul_vd2_vd2_vd2(d, u); d = ddadd2_vd2_vd2_vd2(d, vcast_vd2_vd_vd(vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 1.1283791670955125586, -1.1283791674717296161, -0.57236496645145429341, -0.57236494292470108114), vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o2, 1.5335459613165822674e-17, 8.0896847755965377194e-17, 3.0704553245872027258e-17, -2.3984352208056898003e-17))); x = ddmul_vd2_vd2_vd(vsel_vd2_vo_vd2_vd2(o1, d, vcast_vd2_vd_vd(vneg_vd_vd(a), vcast_vd_d(0))), a); x = vsel_vd2_vo_vd2_vd2(o1, x, ddadd2_vd2_vd2_vd2(x, d)); x = vsel_vd2_vo_vd2_vd2(o0, ddsub_vd2_vd2_vd2(vcast_vd2_d_d(1, 0), x), expk2(x)); x = vsel_vd2_vo_vd2_vd2(o1, x, ddmul_vd2_vd2_vd2(x, u)); r = vsel_vd_vo_vd_vd(o3, vadd_vd_vd_vd(x.x, x.y), vcast_vd_d(0)); r = vsel_vd_vo_vd_vd(vsignbit_vo_vd(s), vsub_vd_vd_vd(vcast_vd_d(2), r), r); r = vsel_vd_vo_vd_vd(visnan_vo_vd(s), vcast_vd_d(SLEEF_NAN), r); return r; } #ifndef ENABLE_GNUABI EXPORT CONST int xgetInt(int name) { if (1 <= name && name <= 10) return vavailability_i(name); return 0; } EXPORT CONST void *xgetPtr(int name) { if (name == 0) return ISANAME; return (void *)0; } #endif #ifdef ALIAS_NO_EXT_SUFFIX #include ALIAS_NO_EXT_SUFFIX #endif #ifdef ENABLE_MAIN // gcc -DENABLE_MAIN -Wno-attributes -I../common -I../arch -DENABLE_AVX2 -mavx2 -mfma sleefsimddp.c ../common/common.c -lm #include #include int main(int argc, char **argv) { vdouble d1 = vcast_vd_d(atof(argv[1])); vdouble d2 = vcast_vd_d(atof(argv[2])); //vdouble d3 = vcast_vd_d(atof(argv[3])); //vdouble r = xnextafter(d1, d2); //int i; //double fr = frexp(atof(argv[1]), &i); //printf("%.20g\n", xfma(d1, d2, d3)[0]);; //printf("test %.20g\n", xtgamma_u1(d1)[0]); //printf("corr %.20g\n", tgamma(d1[0])); //printf("test %.20g\n", xerf_u1(d1)[0]); //printf("corr %.20g\n", erf(d1[0])); //printf("test %.20g\n", xerfc_u15(d1)[0]); //printf("corr %.20g\n", erfc(d1[0])); //printf("%.20g\n", nextafter(d1[0], d2[0]));; //printf("%.20g\n", vcast_d_vd(xhypot_u05(d1, d2))); //printf("%.20g\n", fr); printf("%.20g\n", fmod(atof(argv[1]), atof(argv[2]))); printf("%.20g\n", xfmod(d1, d2)[0]); //vdouble2 r = xsincospi_u35(a); //printf("%g, %g\n", vcast_d_vd(r.x), vcast_d_vd(r.y)); } #endif #ifdef ENABLE_GNUABI /* "finite" aliases for compatibility with GLIBC */ EXPORT CONST vdouble __acos_finite (vdouble) __attribute__((weak, alias(str_xacos ))); EXPORT CONST vdouble __acosh_finite (vdouble) __attribute__((weak, alias(str_xacosh ))); EXPORT CONST vdouble __asin_finite (double) __attribute__((weak, alias(str_xasin_u1 ))); EXPORT CONST vdouble __atan2_finite (vdouble, vdouble) __attribute__((weak, alias(str_xatan2_u1 ))); EXPORT CONST vdouble __atanh_finite (vdouble) __attribute__((weak, alias(str_xatanh ))); EXPORT CONST vdouble __cosh_finite (vdouble) __attribute__((weak, alias(str_xcosh ))); EXPORT CONST vdouble __exp10_finite (vdouble) __attribute__((weak, alias(str_xexp10 ))); EXPORT CONST vdouble __exp2_finite (vdouble) __attribute__((weak, alias(str_xexp2 ))); EXPORT CONST vdouble __exp_finite (vdouble) __attribute__((weak, alias(str_xexp ))); EXPORT CONST vdouble __fmod_finite (vdouble, vdouble) __attribute__((weak, alias(str_xfmod ))); EXPORT CONST vdouble __modf_finite (vdouble, vdouble *) __attribute__((weak, alias(str_xmodf ))); EXPORT CONST vdouble __hypot_u05_finite(vdouble, vdouble) __attribute__((weak, alias(str_xhypot_u05))); EXPORT CONST vdouble __lgamma_u1_finite(vdouble) __attribute__((weak, alias(str_xlgamma_u1))); EXPORT CONST vdouble __log10_finite (vdouble) __attribute__((weak, alias(str_xlog10 ))); EXPORT CONST vdouble __log_finite (vdouble) __attribute__((weak, alias(str_xlog_u1 ))); EXPORT CONST vdouble __pow_finite (vdouble, vdouble) __attribute__((weak, alias(str_xpow ))); EXPORT CONST vdouble __sinh_finite (vdouble) __attribute__((weak, alias(str_xsinh ))); EXPORT CONST vdouble __sqrt_finite (vdouble) __attribute__((weak, alias(str_xsqrt ))); EXPORT CONST vdouble __tgamma_u1_finite(vdouble) __attribute__((weak, alias(str_xtgamma_u1))); #ifdef HEADER_MASKED #include HEADER_MASKED #endif #endif /* #ifdef ENABLE_GNUABI */ sleef-3.3.1/src/libm/sleefsimdsp.c000066400000000000000000003066261333715643700170410ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include "misc.h" extern const float rempitabsp[]; #define __SLEEFSIMDSP_C__ #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif #ifdef ENABLE_SSE2 #define CONFIG 2 #include "helpersse2.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesse2_gnuabi.h" #else #include "renamesse2.h" #endif #endif #endif #ifdef ENABLE_SSE4 #define CONFIG 4 #include "helpersse2.h" #ifdef DORENAME #include "renamesse4.h" #endif #endif #ifdef ENABLE_AVX #define CONFIG 1 #include "helperavx.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx_gnuabi.h" #else #include "renameavx.h" #endif #endif #endif #ifdef ENABLE_FMA4 #define CONFIG 4 #include "helperavx.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamefma4_gnuabi.h" #else #include "renamefma4.h" #endif #endif #endif #ifdef ENABLE_AVX2 #define CONFIG 1 #include "helperavx2.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx2_gnuabi.h" #else #include "renameavx2.h" #endif #endif #endif #ifdef ENABLE_AVX2128 #define CONFIG 1 #include "helperavx2_128.h" #ifdef DORENAME #include "renameavx2128.h" #endif #endif #ifdef ENABLE_AVX512F #define CONFIG 1 #include "helperavx512f.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameavx512f_gnuabi.h" #else #include "renameavx512f.h" #endif #endif #endif #ifdef ENABLE_ADVSIMD #define CONFIG 1 #include "helperadvsimd.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renameadvsimd_gnuabi.h" #else #include "renameadvsimd.h" #endif #endif #endif #ifdef ENABLE_NEON32 #define CONFIG 1 #include "helperneon32.h" #ifdef DORENAME #include "renameneon32.h" #endif #endif #ifdef ENABLE_VSX #define CONFIG 1 #include "helperpower_128.h" #ifdef DORENAME #include "renamevsx.h" #endif #endif // #ifdef ENABLE_VECEXT #define CONFIG 1 #include "helpervecext.h" #ifdef DORENAME #include "renamevecext.h" #endif #endif #ifdef ENABLE_PUREC #define CONFIG 1 #include "helperpurec.h" #ifdef DORENAME #include "renamepurec.h" #endif #endif // #ifdef ENABLE_SVE #define CONFIG 1 #include "helpersve.h" #ifdef DORENAME #ifdef ENABLE_GNUABI #include "renamesve_gnuabi.h" #else #include "renamesve.h" #endif /* ENABLE_GNUABI */ #endif /* DORENAME */ #endif /* ENABLE_SVE */ // #include "df.h" static INLINE CONST vopmask visnegzero_vo_vf(vfloat d) { return veq_vo_vi2_vi2(vreinterpret_vi2_vf(d), vreinterpret_vi2_vf(vcast_vf_f(-0.0))); } static INLINE vopmask vnot_vo32_vo32(vopmask x) { return vxor_vo_vo_vo(x, veq_vo_vi2_vi2(vcast_vi2_i(0), vcast_vi2_i(0))); } static INLINE CONST vmask vsignbit_vm_vf(vfloat f) { return vand_vm_vm_vm(vreinterpret_vm_vf(f), vreinterpret_vm_vf(vcast_vf_f(-0.0f))); } static INLINE CONST vfloat vmulsign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x), vsignbit_vm_vf(y))); } static INLINE CONST vfloat vcopysign_vf_vf_vf(vfloat x, vfloat y) { return vreinterpret_vf_vm(vxor_vm_vm_vm(vandnot_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(x)), vand_vm_vm_vm (vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(y)))); } static INLINE CONST vfloat vsign_vf_vf(vfloat f) { return vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(1.0f)), vand_vm_vm_vm(vreinterpret_vm_vf(vcast_vf_f(-0.0f)), vreinterpret_vm_vf(f)))); } static INLINE CONST vopmask vsignbit_vo_vf(vfloat d) { return veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vcast_vi2_i(0x80000000)), vcast_vi2_i(0x80000000)); } static INLINE CONST vint2 vsel_vi2_vf_vf_vi2_vi2(vfloat f0, vfloat f1, vint2 x, vint2 y) { return vsel_vi2_vo_vi2_vi2(vlt_vo_vf_vf(f0, f1), x, y); } static INLINE CONST vint2 vsel_vi2_vf_vi2(vfloat d, vint2 x) { return vand_vi2_vo_vi2(vsignbit_vo_vf(d), x); } static INLINE CONST vopmask visint_vo_vf(vfloat y) { return veq_vo_vf_vf(vtruncate_vf_vf(y), y); } static INLINE CONST vopmask visnumber_vo_vf(vfloat x) { return vnot_vo32_vo32(vor_vo_vo_vo(visinf_vo_vf(x), visnan_vo_vf(x))); } #ifndef ENABLE_AVX512F static INLINE CONST vint2 vilogbk_vi2_vf(vfloat d) { vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(5.421010862427522E-20f)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(vcast_vf_f(1.8446744073709552E19f), d), d); vint2 q = vand_vi2_vi2_vi2(vsrl_vi2_vi2_i(vcast_vi2_vm(vreinterpret_vm_vf(d)), 23), vcast_vi2_i(0xff)); q = vsub_vi2_vi2_vi2(q, vsel_vi2_vo_vi2_vi2(o, vcast_vi2_i(64 + 0x7f), vcast_vi2_i(0x7f))); return q; } static INLINE CONST vint2 vilogb2k_vi2_vf(vfloat d) { vint2 q = vreinterpret_vi2_vf(d); q = vsrl_vi2_vi2_i(q, 23); q = vand_vi2_vi2_vi2(q, vcast_vi2_i(0xff)); q = vsub_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)); return q; } #endif // EXPORT CONST vint2 xilogbf(vfloat d) { vint2 e = vilogbk_vi2_vf(vabs_vf_vf(d)); e = vsel_vi2_vo_vi2_vi2(veq_vo_vf_vf(d, vcast_vf_f(0.0f)), vcast_vi2_i(SLEEF_FP_ILOGB0), e); e = vsel_vi2_vo_vi2_vi2(visnan_vo_vf(d), vcast_vi2_i(SLEEF_FP_ILOGBNAN), e); e = vsel_vi2_vo_vi2_vi2(visinf_vo_vf(d), vcast_vi2_i(INT_MAX), e); return e; } static INLINE CONST vfloat vpow2i_vf_vi2(vint2 q) { return vreinterpret_vf_vm(vcast_vm_vi2(vsll_vi2_vi2_i(vadd_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)), 23))); } static INLINE CONST vfloat vldexp_vf_vf_vi2(vfloat x, vint2 q) { vfloat u; vint2 m = vsra_vi2_vi2_i(q, 31); m = vsll_vi2_vi2_i(vsub_vi2_vi2_vi2(vsra_vi2_vi2_i(vadd_vi2_vi2_vi2(m, q), 6), m), 4); q = vsub_vi2_vi2_vi2(q, vsll_vi2_vi2_i(m, 2)); m = vadd_vi2_vi2_vi2(m, vcast_vi2_i(0x7f)); m = vand_vi2_vi2_vi2(vgt_vi2_vi2_vi2(m, vcast_vi2_i(0)), m); vint2 n = vgt_vi2_vi2_vi2(m, vcast_vi2_i(0xff)); m = vor_vi2_vi2_vi2(vandnot_vi2_vi2_vi2(n, m), vand_vi2_vi2_vi2(n, vcast_vi2_i(0xff))); u = vreinterpret_vf_vm(vcast_vm_vi2(vsll_vi2_vi2_i(m, 23))); x = vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(x, u), u), u), u); u = vreinterpret_vf_vm(vcast_vm_vi2(vsll_vi2_vi2_i(vadd_vi2_vi2_vi2(q, vcast_vi2_i(0x7f)), 23))); return vmul_vf_vf_vf(x, u); } static INLINE CONST vfloat vldexp2_vf_vf_vi2(vfloat d, vint2 e) { return vmul_vf_vf_vf(vmul_vf_vf_vf(d, vpow2i_vf_vi2(vsra_vi2_vi2_i(e, 1))), vpow2i_vf_vi2(vsub_vi2_vi2_vi2(e, vsra_vi2_vi2_i(e, 1)))); } static INLINE CONST vfloat vldexp3_vf_vf_vi2(vfloat d, vint2 q) { return vreinterpret_vf_vi2(vadd_vi2_vi2_vi2(vreinterpret_vi2_vf(d), vsll_vi2_vi2_i(q, 23))); } EXPORT CONST vfloat xldexpf(vfloat x, vint2 q) { return vldexp_vf_vf_vi2(x, q); } #ifdef ENABLE_SVE typedef __sizeless_struct { vfloat d; vint2 i; } fi_t; typedef __sizeless_struct { vfloat2 df; vint2 i; } dfi_t; #else typedef struct { vfloat d; vint2 i; } fi_t; typedef struct { vfloat2 df; vint2 i; } dfi_t; #endif static INLINE CONST fi_t rempisubf(vfloat x) { #ifdef FULL_FP_ROUNDING vfloat y = vrint_vf_vf(vmul_vf_vf_vf(x, vcast_vf_f(4))); vint2 vi = vtruncate_vi2_vf(vsub_vf_vf_vf(y, vmul_vf_vf_vf(vrint_vf_vf(x), vcast_vf_f(4)))); fi_t ret = { vsub_vf_vf_vf(x, vmul_vf_vf_vf(y, vcast_vf_f(0.25))), vi }; #else vfloat fr = vsub_vf_vf_vf(x, vmul_vf_vf_vf(vcast_vf_f(1LL << 10), vtruncate_vf_vf(vmul_vf_vf_vf(x, vcast_vf_f(1.0 / (1LL << 10)))))); vint2 vi = vadd_vi2_vi2_vi2(vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(x, vcast_vf_f(0)), vcast_vi2_i(4), vcast_vi2_i(3)), vtruncate_vi2_vf(vmul_vf_vf_vf(fr, vcast_vf_f(8)))); vi = vsra_vi2_vi2_i(vsub_vi2_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i(7), vi), vcast_vi2_i(3)), 1); fr = vsub_vf_vf_vf(fr, vmul_vf_vf_vf(vcast_vf_f(0.25), vtruncate_vf_vf(vmla_vf_vf_vf_vf(fr, vcast_vf_f(4), vmulsign_vf_vf_vf(vcast_vf_f(0.5), x))))); fr = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(fr), vcast_vf_f(0.25)), vsub_vf_vf_vf(fr, vmulsign_vf_vf_vf(vcast_vf_f(0.5), x)), fr); fr = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(fr), vcast_vf_f(1e+10)), vcast_vf_f(0), fr); vopmask o = veq_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(0.12499999254941940308f)); fr = vsel_vf_vo_vf_vf(o, x, fr); vi = vsel_vi2_vo_vi2_vi2(o, vcast_vi2_i(0), vi); fi_t ret = { fr, vi }; #endif return ret; } static INLINE CONST dfi_t rempif(vfloat a) { vfloat2 x, y, z; vint2 ex = vilogb2k_vi2_vf(a); #if defined(ENABLE_AVX512F) ex = vandnot_vi2_vi2_vi2(vsra_vi2_vi2_i(ex, 31), ex); ex = vand_vi2_vi2_vi2(ex, vcast_vi2_i(127)); #endif ex = vsub_vi2_vi2_vi2(ex, vcast_vi2_i(25)); vint2 q = vand_vi2_vo_vi2(vgt_vo_vi2_vi2(ex, vcast_vi2_i(90-25)), vcast_vi2_i(-64)); a = vldexp3_vf_vf_vi2(a, q); ex = vandnot_vi2_vi2_vi2(vsra_vi2_vi2_i(ex, 31), ex); ex = vsll_vi2_vi2_i(ex, 2); x = dfmul_vf2_vf_vf(a, vgather_vf_p_vi2(rempitabsp, ex)); fi_t di = rempisubf(x.x); q = di.i; x.x = di.d; x = dfnormalize_vf2_vf2(x); y = dfmul_vf2_vf_vf(a, vgather_vf_p_vi2(rempitabsp+1, ex)); x = dfadd2_vf2_vf2_vf2(x, y); di = rempisubf(x.x); q = vadd_vi2_vi2_vi2(q, di.i); x.x = di.d; x = dfnormalize_vf2_vf2(x); y = vcast_vf2_vf_vf(vgather_vf_p_vi2(rempitabsp+2, ex), vgather_vf_p_vi2(rempitabsp+3, ex)); y = dfmul_vf2_vf2_vf(y, a); x = dfadd2_vf2_vf2_vf2(x, y); x = dfnormalize_vf2_vf2(x); x = dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(3.1415927410125732422f*2, -8.7422776573475857731e-08f*2)); x = vsel_vf2_vo_vf2_vf2(vlt_vo_vf_vf(vabs_vf_vf(a), vcast_vf_f(0.7f)), vcast_vf2_vf_vf(a, vcast_vf_f(0)), x); dfi_t ret = { x, q }; return ret; } EXPORT CONST vfloat xsinf(vfloat d) { vint2 q; vfloat u, s, r = d; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI))); u = vcast_vf_vi2(q); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f), d); } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI))); u = vcast_vf_vi2(q); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df), d); } else { dfi_t dfi = rempif(d); q = vand_vi2_vi2_vi2(dfi.i, vcast_vi2_i(3)); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(dfi.df.x, vcast_vf_f(0)), vcast_vi2_i(2), vcast_vi2_i(1))); q = vsra_vi2_vi2_i(q, 2); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfi.i, vcast_vi2_i(1)), vcast_vi2_i(1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), dfi.df.x), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), dfi.df.x)); x = dfadd2_vf2_vf2_vf2(dfi.df, x); dfi.df = vsel_vf2_vo_vf2_vf2(o, x, dfi.df); d = vadd_vf_vf_vf(dfi.df.x, dfi.df.y); d = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(r), visnan_vo_vf(r)), vreinterpret_vm_vf(d))); } s = vmul_vf_vf_vf(d, d); d = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(d))); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f)); u = vadd_vf_vf_vf(vmul_vf_vf_vf(s, vmul_vf_vf_vf(u, d)), d); u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(r), r, u); return u; } EXPORT CONST vfloat xcosf(vfloat d) { vint2 q; vfloat u, s, r = d; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { q = vrint_vi2_vf(vsub_vf_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)), vcast_vf_f(0.5f))); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vcast_vi2_i(1)); u = vcast_vf_vi2(q); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), d); } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { q = vrint_vi2_vf(vsub_vf_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_1_PI)), vcast_vf_f(0.5f))); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vcast_vi2_i(1)); u = vcast_vf_vi2(q); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), d); d = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), d); } else { dfi_t dfi = rempif(d); q = vand_vi2_vi2_vi2(dfi.i, vcast_vi2_i(3)); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(dfi.df.x, vcast_vf_f(0)), vcast_vi2_i(8), vcast_vi2_i(7))); q = vsra_vi2_vi2_i(q, 1); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfi.i, vcast_vi2_i(1)), vcast_vi2_i(0)); vfloat y = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(dfi.df.x, vcast_vf_f(0)), vcast_vf_f(0), vcast_vf_f(-1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), y), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), y)); x = dfadd2_vf2_vf2_vf2(dfi.df, x); dfi.df = vsel_vf2_vo_vf2_vf2(o, x, dfi.df); d = vadd_vf_vf_vf(dfi.df.x, dfi.df.y); d = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(r), visnan_vo_vf(r)), vreinterpret_vm_vf(d))); } s = vmul_vf_vf_vf(d, d); d = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(d))); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833307858556509017944336f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666597127914428710938f)); u = vadd_vf_vf_vf(vmul_vf_vf_vf(s, vmul_vf_vf_vf(u, d)), d); return u; } EXPORT CONST vfloat xtanf(vfloat d) { vint2 q; vopmask o; vfloat u, s, x; x = d; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f*0.5f))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); u = vcast_vf_vi2(q); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), x); } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI)))); u = vcast_vf_vi2(q); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), x); x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), x); } else { dfi_t dfi = rempif(d); q = dfi.i; x = vadd_vf_vf_vf(dfi.df.x, dfi.df.y); x = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(x))); x = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, x); } s = vmul_vf_vf_vf(x, x); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(x))); u = vcast_vf_f(0.00927245803177356719970703f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00331984995864331722259521f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0242998078465461730957031f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0534495301544666290283203f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.133383005857467651367188f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.333331853151321411132812f)); u = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(u, x), x); u = vsel_vf_vo_vf_vf(o, vrec_vf_vf(u), u); return u; } EXPORT CONST vfloat xsinf_u1(vfloat d) { vint2 q; vfloat u, v; vfloat2 s, t, x; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(M_1_PI))); q = vrint_vi2_vf(u); v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f), d); s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f))); } else { dfi_t dfi = rempif(d); q = vand_vi2_vi2_vi2(dfi.i, vcast_vi2_i(3)); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(dfi.df.x, vcast_vf_f(0)), vcast_vi2_i(2), vcast_vi2_i(1))); q = vsra_vi2_vi2_i(q, 2); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfi.i, vcast_vi2_i(1)), vcast_vi2_i(1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), dfi.df.x), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), dfi.df.x)); x = dfadd2_vf2_vf2_vf2(dfi.df, x); dfi.df = vsel_vf2_vo_vf2_vf2(o, x, dfi.df); s = dfnormalize_vf2_vf2(dfi.df); s.x = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(s.x))); } t = s; s = dfsqu_vf2_vf2(s); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.00833307858556509017944336f)); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, s.x)), s)); u = dfmul_vf_vf2_vf2(t, x); u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(u))); u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); return u; } EXPORT CONST vfloat xcosf_u1(vfloat d) { vint2 q; vfloat u; vfloat2 s, t, x; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { vfloat dq = vmla_vf_vf_vf_vf(vrint_vf_vf(vmla_vf_vf_vf_vf(d, vcast_vf_f(M_1_PI), vcast_vf_f(-0.5f))), vcast_vf_f(2), vcast_vf_f(1)); q = vrint_vi2_vf(dq); s = dfadd2_vf2_vf_vf (d, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_A2f*0.5f))); s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_B2f*0.5f))); s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(dq, vcast_vf_f(-PI_C2f*0.5f))); } else { dfi_t dfi = rempif(d); q = vand_vi2_vi2_vi2(dfi.i, vcast_vi2_i(3)); q = vadd_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, q), vsel_vi2_vo_vi2_vi2(vgt_vo_vf_vf(dfi.df.x, vcast_vf_f(0)), vcast_vi2_i(8), vcast_vi2_i(7))); q = vsra_vi2_vi2_i(q, 1); vopmask o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(dfi.i, vcast_vi2_i(1)), vcast_vi2_i(0)); vfloat y = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(dfi.df.x, vcast_vf_f(0)), vcast_vf_f(0), vcast_vf_f(-1)); vfloat2 x = vcast_vf2_vf_vf(vmulsign_vf_vf_vf(vcast_vf_f(3.1415927410125732422f*-0.5), y), vmulsign_vf_vf_vf(vcast_vf_f(-8.7422776573475857731e-08f*-0.5), y)); x = dfadd2_vf2_vf2_vf2(dfi.df, x); dfi.df = vsel_vf2_vo_vf2_vf2(o, x, dfi.df); s = dfnormalize_vf2_vf2(dfi.df); s.x = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(s.x))); } t = s; s = dfsqu_vf2_vf2(s); u = vcast_vf_f(2.6083159809786593541503e-06f); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-0.0001981069071916863322258f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.00833307858556509017944336f)); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf(vcast_vf_f(-0.166666597127914428710938f), vmul_vf_vf_vf(u, s.x)), s)); u = dfmul_vf_vf2_vf2(t, x); u = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)), vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(u))); return u; } #ifdef ENABLE_GNUABI #define TYPE2_FUNCATR static INLINE CONST #define TYPE6_FUNCATR static INLINE CONST #define SQRTFU05_FUNCATR static INLINE CONST #define XSINCOSF sincosfk #define XSINCOSF_U1 sincosfk_u1 #define XSINCOSPIF_U05 sincospifk_u05 #define XSINCOSPIF_U35 sincospifk_u35 #define XMODFF modffk #else #define TYPE2_FUNCATR EXPORT CONST #define TYPE6_FUNCATR EXPORT #define SQRTFU05_FUNCATR EXPORT #define XSINCOSF xsincosf #define XSINCOSF_U1 xsincosf_u1 #define XSINCOSPIF_U05 xsincospif_u05 #define XSINCOSPIF_U35 xsincospif_u35 #define XMODFF xmodff #endif TYPE2_FUNCATR vfloat2 XSINCOSF(vfloat d) { vint2 q; vopmask o; vfloat u, s, t, rx, ry; vfloat2 r; s = d; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI))); u = vcast_vf_vi2(q); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), s); } else if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAXf))))) { q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)M_2_PI))); u = vcast_vf_vi2(q); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Af*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Bf*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Cf*0.5f), s); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_Df*0.5f), s); } else { dfi_t dfi = rempif(d); q = dfi.i; s = vadd_vf_vf_vf(dfi.df.x, dfi.df.y); s = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)), vreinterpret_vm_vf(s))); } t = s; s = vmul_vf_vf_vf(s, s); u = vcast_vf_f(-0.000195169282960705459117889f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833215750753879547119141f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.166666537523269653320312f)); rx = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(u, s), t, t); rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); u = vcast_vf_f(-2.71811842367242206819355e-07f); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(2.47990446951007470488548e-05f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.00138888787478208541870117f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416666641831398010253906f)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.5)); ry = vmla_vf_vf_vf_vf(s, u, vcast_vf_f(1)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); r.x = vsel_vf_vo_vf_vf(o, rx, ry); r.y = vsel_vf_vo_vf_vf(o, ry, rx); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); r.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.x))); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); r.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.y))); return r; } TYPE2_FUNCATR vfloat2 XSINCOSF_U1(vfloat d) { vint2 q; vopmask o; vfloat u, v, rx, ry; vfloat2 r, s, t, x; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI))); q = vrint_vi2_vf(u); v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f))); } else { dfi_t dfi = rempif(d); q = dfi.i; s = dfi.df; o = vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)); s.x = vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(s.x))); } t = s; s.x = dfsqu_vf_vf2(s); u = vcast_vf_f(-0.000195169282960705459117889f); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.00833215750753879547119141f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-0.166666537523269653320312f)); u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(s.x, t.x)); x = dfadd_vf2_vf2_vf(t, u); rx = vadd_vf_vf_vf(x.x, x.y); rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); u = vcast_vf_f(-2.71811842367242206819355e-07f); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(2.47990446951007470488548e-05f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-0.00138888787478208541870117f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0416666641831398010253906f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-0.5)); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf_vf(s.x, u)); ry = vadd_vf_vf_vf(x.x, x.y); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(0)); r.x = vsel_vf_vo_vf_vf(o, rx, ry); r.y = vsel_vf_vo_vf_vf(o, ry, rx); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); r.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.x))); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(2)), vcast_vi2_i(2)); r.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.y))); return r; } TYPE2_FUNCATR vfloat2 XSINCOSPIF_U05(vfloat d) { vopmask o; vfloat u, s, t, rx, ry; vfloat2 r, x, s2; u = vmul_vf_vf_vf(d, vcast_vf_f(4)); vint2 q = vtruncate_vi2_vf(u); q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); t = s; s = vmul_vf_vf_vf(s, s); s2 = dfmul_vf2_vf_vf(t, t); // u = vcast_vf_f(+0.3093842054e-6); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3657307388e-4)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2490393585e-2)); x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), vcast_vf2_f_f(-0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), vcast_vf2_f_f(0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_vf2_vf2_vf(x, t); rx = vadd_vf_vf_vf(x.x, x.y); rx = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), rx); // u = vcast_vf_f(-0.2430611801e-7); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.3590577080e-5)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3259917721e-3)); x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), vcast_vf2_f_f(0.015854343771934509277, 4.4940051354032242811e-10)); x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), vcast_vf2_f_f(-0.30842512845993041992, -9.0728339030733922277e-09)); x = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf2(x, s2), vcast_vf_f(1)); ry = vadd_vf_vf_vf(x.x, x.y); // o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)); r.x = vsel_vf_vo_vf_vf(o, rx, ry); r.y = vsel_vf_vo_vf_vf(o, ry, rx); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(4)), vcast_vi2_i(4)); r.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.x))); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(4)), vcast_vi2_i(4)); r.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.y))); o = vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1e+7f)); r.x = vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(r.x))); r.y = vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(r.y))); o = visinf_vo_vf(d); r.x = vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(r.x))); r.y = vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(r.y))); return r; } TYPE2_FUNCATR vfloat2 XSINCOSPIF_U35(vfloat d) { vopmask o; vfloat u, s, t, rx, ry; vfloat2 r; u = vmul_vf_vf_vf(d, vcast_vf_f(4)); vint2 q = vtruncate_vi2_vf(u); q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); t = s; s = vmul_vf_vf_vf(s, s); // u = vcast_vf_f(-0.3600925265e-4); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2490088111e-2)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.8074551076e-1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.7853981853e+0)); rx = vmul_vf_vf_vf(u, t); // u = vcast_vf_f(+0.3539815225e-5); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3259574005e-3)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1585431583e-1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(-0.3084251285e+0)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(1)); ry = u; // o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)); r.x = vsel_vf_vo_vf_vf(o, rx, ry); r.y = vsel_vf_vo_vf_vf(o, ry, rx); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(4)), vcast_vi2_i(4)); r.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.x))); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(4)), vcast_vi2_i(4)); r.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(r.y))); o = vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1e+7f)); r.x = vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(r.x))); r.y = vreinterpret_vf_vm(vandnot_vm_vo32_vm(o, vreinterpret_vm_vf(r.y))); o = visinf_vo_vf(d); r.x = vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(r.x))); r.y = vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(r.y))); return r; } TYPE6_FUNCATR vfloat2 XMODFF(vfloat x) { vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); fr = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(1LL << 23)), vcast_vf_f(0), fr); vfloat2 ret; ret.x = vcopysign_vf_vf_vf(fr, x); ret.y = vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x); return ret; } #ifdef ENABLE_GNUABI EXPORT void xsincosf(vfloat a, float *ps, float *pc) { vfloat2 r = sincosfk(a); vstoreu_v_p_vf(ps, r.x); vstoreu_v_p_vf(pc, r.y); } EXPORT void xsincosf_u1(vfloat a, float *ps, float *pc) { vfloat2 r = sincosfk_u1(a); vstoreu_v_p_vf(ps, r.x); vstoreu_v_p_vf(pc, r.y); } EXPORT void xsincospif_u05(vfloat a, float *ps, float *pc) { vfloat2 r = sincospifk_u05(a); vstoreu_v_p_vf(ps, r.x); vstoreu_v_p_vf(pc, r.y); } EXPORT void xsincospif_u35(vfloat a, float *ps, float *pc) { vfloat2 r = sincospifk_u35(a); vstoreu_v_p_vf(ps, r.x); vstoreu_v_p_vf(pc, r.y); } EXPORT CONST vfloat xmodff(vfloat a, float *iptr) { vfloat2 r = modffk(a); vstoreu_v_p_vf(iptr, r.y); return r.x; } #endif // #ifdef ENABLE_GNUABI EXPORT CONST vfloat xtanf_u1(vfloat d) { vint2 q; vfloat u, v; vfloat2 s, t, x; vopmask o; if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f))))) { u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(2 * M_1_PI))); q = vrint_vi2_vf(u); v = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d); s = dfadd2_vf2_vf_vf(v, vmul_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f))); } else { dfi_t dfi = rempif(d); q = dfi.i; s = dfi.df; o = vor_vo_vo_vo(visinf_vo_vf(d), visnan_vo_vf(d)); s.x = vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(s.x))); s.y = vreinterpret_vf_vm(vor_vm_vo32_vm(o, vreinterpret_vm_vf(s.y))); } o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)); vmask n = vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))); s.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(s.x), n)); s.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(s.y), n)); t = s; s = dfsqu_vf2_vf2(s); s = dfnormalize_vf2_vf2(s); u = vcast_vf_f(0.00446636462584137916564941f); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(-8.3920182078145444393158e-05f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0109639242291450500488281f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0212360303848981857299805f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0540687143802642822265625f)); x = dfadd_vf2_vf_vf(vcast_vf_f(0.133325666189193725585938f), vmul_vf_vf_vf(u, s.x)); x = dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(0.33333361148834228515625f), dfmul_vf2_vf2_vf2(s, x)), s)); x = dfmul_vf2_vf2_vf2(t, x); x = vsel_vf2_vo_vf2_vf2(o, dfrec_vf2_vf2(x), x); u = vadd_vf_vf_vf(x.x, x.y); u = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), d, u); return u; } EXPORT CONST vfloat xatanf(vfloat d) { vfloat s, t, u; vint2 q; q = vsel_vi2_vf_vi2(d, vcast_vi2_i(2)); s = vabs_vf_vf(d); q = vsel_vi2_vf_vf_vi2_vi2(vcast_vf_f(1.0f), s, vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q); s = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vcast_vf_f(1.0f), s), vrec_vf_vf(s), s); t = vmul_vf_vf_vf(s, s); u = vcast_vf_f(0.00282363896258175373077393f); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0159569028764963150024414f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.0425049886107444763183594f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0748900920152664184570312f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.106347933411598205566406f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.142027363181114196777344f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.199926957488059997558594f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.333331018686294555664062f)); t = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(t, u), s); t = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(1)), vcast_vi2_i(1)), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), t), t); t = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(t))); #ifdef ENABLE_NEON32 t = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(1.5874010519681994747517056f), d), t); #endif return t; } static INLINE CONST vfloat atan2kf(vfloat y, vfloat x) { vfloat s, t, u; vint2 q; vopmask p; q = vsel_vi2_vf_vi2(x, vcast_vi2_i(-2)); x = vabs_vf_vf(x); q = vsel_vi2_vf_vf_vi2_vi2(x, y, vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q); p = vlt_vo_vf_vf(x, y); s = vsel_vf_vo_vf_vf(p, vneg_vf_vf(x), y); t = vmax_vf_vf_vf(x, y); s = vdiv_vf_vf_vf(s, t); t = vmul_vf_vf_vf(s, s); u = vcast_vf_f(0.00282363896258175373077393f); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0159569028764963150024414f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.0425049886107444763183594f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.0748900920152664184570312f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.106347933411598205566406f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.142027363181114196777344f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(0.199926957488059997558594f)); u = vmla_vf_vf_vf_vf(u, t, vcast_vf_f(-0.333331018686294555664062f)); t = vmla_vf_vf_vf_vf(s, vmul_vf_vf_vf(t, u), s); t = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f((float)(M_PI/2)), t); return t; } static INLINE CONST vfloat visinf2_vf_vf_vf(vfloat d, vfloat m) { return vreinterpret_vf_vm(vand_vm_vo32_vm(visinf_vo_vf(d), vor_vm_vm_vm(vsignbit_vm_vf(d), vreinterpret_vm_vf(m)))); } EXPORT CONST vfloat xatan2f(vfloat y, vfloat x) { vfloat r = atan2kf(vabs_vf_vf(y), x); r = vmulsign_vf_vf_vf(r, x); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), veq_vo_vf_vf(x, vcast_vf_f(0.0f))), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), x))), r); r = vsel_vf_vo_vf_vf(visinf_vo_vf(y), vsub_vf_vf_vf(vcast_vf_f((float)(M_PI/2)), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f((float)(M_PI/4)), x))), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(0.0f)), vreinterpret_vf_vm(vand_vm_vo32_vm(vsignbit_vo_vf(x), vreinterpret_vm_vf(vcast_vf_f((float)M_PI)))), r); r = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(vmulsign_vf_vf_vf(r, y)))); return r; } EXPORT CONST vfloat xasinf(vfloat d) { vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))); vfloat x = vsel_vf_vo_vf_vf(o, vabs_vf_vf(d), vsqrt_vf_vf(x2)), u; u = vcast_vf_f(+0.4197454825e-1); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); u = vmla_vf_vf_vf_vf(u, vmul_vf_vf_vf(x, x2), x); vfloat r = vsel_vf_vo_vf_vf(o, u, vmla_vf_vf_vf_vf(u, vcast_vf_f(-2), vcast_vf_f(M_PIf/2))); return vmulsign_vf_vf_vf(r, d); } EXPORT CONST vfloat xacosf(vfloat d) { vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))), u; vfloat x = vsel_vf_vo_vf_vf(o, vabs_vf_vf(d), vsqrt_vf_vf(x2)); x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1.0f)), vcast_vf_f(0), x); u = vcast_vf_f(+0.4197454825e-1); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(x2, x)); vfloat y = vsub_vf_vf_vf(vcast_vf_f(3.1415926535897932f/2), vadd_vf_vf_vf(vmulsign_vf_vf_vf(x, d), vmulsign_vf_vf_vf(u, d))); x = vadd_vf_vf_vf(x, u); vfloat r = vsel_vf_vo_vf_vf(o, y, vmul_vf_vf_vf(x, vcast_vf_f(2))); return vsel_vf_vo_vf_vf(vandnot_vo_vo_vo(o, vlt_vo_vf_vf(d, vcast_vf_f(0))), dfadd_vf2_vf2_vf(vcast_vf2_f_f(3.1415927410125732422f,-8.7422776573475857731e-08f), vneg_vf_vf(r)).x, r); } // static INLINE CONST vfloat2 atan2kf_u1(vfloat2 y, vfloat2 x) { vfloat u; vfloat2 s, t; vint2 q; vopmask p; vmask r; q = vsel_vi2_vf_vf_vi2_vi2(x.x, vcast_vf_f(0), vcast_vi2_i(-2), vcast_vi2_i(0)); p = vlt_vo_vf_vf(x.x, vcast_vf_f(0)); r = vand_vm_vo32_vm(p, vreinterpret_vm_vf(vcast_vf_f(-0.0))); x.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x.x), r)); x.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vreinterpret_vm_vf(x.y), r)); q = vsel_vi2_vf_vf_vi2_vi2(x.x, y.x, vadd_vi2_vi2_vi2(q, vcast_vi2_i(1)), q); p = vlt_vo_vf_vf(x.x, y.x); s = vsel_vf2_vo_vf2_vf2(p, dfneg_vf2_vf2(x), y); t = vsel_vf2_vo_vf2_vf2(p, y, x); s = dfdiv_vf2_vf2_vf2(s, t); t = dfsqu_vf2_vf2(s); t = dfnormalize_vf2_vf2(t); u = vcast_vf_f(-0.00176397908944636583328247f); u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(0.0107900900766253471374512f)); u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(-0.0309564601629972457885742f)); u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(0.0577365085482597351074219f)); u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(-0.0838950723409652709960938f)); u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(0.109463557600975036621094f)); u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(-0.142626821994781494140625f)); u = vmla_vf_vf_vf_vf(u, t.x, vcast_vf_f(0.199983194470405578613281f)); t = dfmul_vf2_vf2_vf2(t, dfadd_vf2_vf_vf(vcast_vf_f(-0.333332866430282592773438f), vmul_vf_vf_vf(u, t.x))); t = dfmul_vf2_vf2_vf2(s, dfadd_vf2_vf_vf2(vcast_vf_f(1), t)); t = dfadd_vf2_vf2_vf2(dfmul_vf2_vf2_vf(vcast_vf2_f_f(1.5707963705062866211f, -4.3711388286737928865e-08f), vcast_vf_vi2(q)), t); return t; } EXPORT CONST vfloat xatan2f_u1(vfloat y, vfloat x) { vopmask o = vlt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(2.9387372783541830947e-39f)); // nexttowardf((1.0 / FLT_MAX), 1) x = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(x, vcast_vf_f(1 << 24)), x); y = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(y, vcast_vf_f(1 << 24)), y); vfloat2 d = atan2kf_u1(vcast_vf2_vf_vf(vabs_vf_vf(y), vcast_vf_f(0)), vcast_vf2_vf_vf(x, vcast_vf_f(0))); vfloat r = vadd_vf_vf_vf(d.x, d.y); r = vmulsign_vf_vf_vf(r, x); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), veq_vo_vf_vf(x, vcast_vf_f(0))), vsub_vf_vf_vf(vcast_vf_f(M_PI/2), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f(M_PI/2), x))), r); r = vsel_vf_vo_vf_vf(visinf_vo_vf(y), vsub_vf_vf_vf(vcast_vf_f(M_PI/2), visinf2_vf_vf_vf(x, vmulsign_vf_vf_vf(vcast_vf_f(M_PI/4), x))), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(0.0f)), vreinterpret_vf_vm(vand_vm_vo32_vm(vsignbit_vo_vf(x), vreinterpret_vm_vf(vcast_vf_f((float)M_PI)))), r); r = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(vmulsign_vf_vf_vf(r, y)))); return r; } EXPORT CONST vfloat xasinf_u1(vfloat d) { vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))), u; vfloat2 x = vsel_vf2_vo_vf2_vf2(o, vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), dfsqrt_vf2_vf(x2)); x = vsel_vf2_vo_vf2_vf2(veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1.0f)), vcast_vf2_f_f(0, 0), x); u = vcast_vf_f(+0.4197454825e-1); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(x2, x.x)); vfloat2 y = dfsub_vf2_vf2_vf(dfsub_vf2_vf2_vf2(vcast_vf2_f_f(3.1415927410125732422f/4,-8.7422776573475857731e-08f/4), x), u); vfloat r = vsel_vf_vo_vf_vf(o, vadd_vf_vf_vf(u, x.x), vmul_vf_vf_vf(vadd_vf_vf_vf(y.x, y.y), vcast_vf_f(2))); return vmulsign_vf_vf_vf(r, d); } EXPORT CONST vfloat xacosf_u1(vfloat d) { vopmask o = vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(0.5f)); vfloat x2 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, d), vmul_vf_vf_vf(vsub_vf_vf_vf(vcast_vf_f(1), vabs_vf_vf(d)), vcast_vf_f(0.5f))), u; vfloat2 x = vsel_vf2_vo_vf2_vf2(o, vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), dfsqrt_vf2_vf(x2)); x = vsel_vf2_vo_vf2_vf2(veq_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1.0f)), vcast_vf2_f_f(0, 0), x); u = vcast_vf_f(+0.4197454825e-1); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.2424046025e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.4547423869e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.7495029271e-1)); u = vmla_vf_vf_vf_vf(u, x2, vcast_vf_f(+0.1666677296e+0)); u = vmul_vf_vf_vf(u, vmul_vf_vf_vf(x2, x.x)); vfloat2 y = dfsub_vf2_vf2_vf2(vcast_vf2_f_f(3.1415927410125732422f/2, -8.7422776573475857731e-08f/2), dfadd_vf2_vf_vf(vmulsign_vf_vf_vf(x.x, d), vmulsign_vf_vf_vf(u, d))); x = dfadd_vf2_vf2_vf(x, u); y = vsel_vf2_vo_vf2_vf2(o, y, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); y = vsel_vf2_vo_vf2_vf2(vandnot_vo_vo_vo(o, vlt_vo_vf_vf(d, vcast_vf_f(0))), dfsub_vf2_vf2_vf2(vcast_vf2_f_f(3.1415927410125732422f, -8.7422776573475857731e-08f), y), y); return vadd_vf_vf_vf(y.x, y.y); } EXPORT CONST vfloat xatanf_u1(vfloat d) { vfloat2 d2 = atan2kf_u1(vcast_vf2_vf_vf(vabs_vf_vf(d), vcast_vf_f(0)), vcast_vf2_f_f(1, 0)); vfloat r = vadd_vf_vf_vf(d2.x, d2.y); r = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vcast_vf_f(1.570796326794896557998982), r); return vmulsign_vf_vf_vf(r, d); } // EXPORT CONST vfloat xlogf(vfloat d) { vfloat x, x2, t, m; #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(1LL << 32) * (float)(1LL << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); #endif x = vdiv_vf_vf_vf(vadd_vf_vf_vf(vcast_vf_f(-1.0f), m), vadd_vf_vf_vf(vcast_vf_f(1.0f), m)); x2 = vmul_vf_vf_vf(x, x); t = vcast_vf_f(0.2392828464508056640625f); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.28518211841583251953125f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.400005877017974853515625f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(0.666666686534881591796875f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(2.0f)); #ifndef ENABLE_AVX512F x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), vcast_vf_vi2(e))); x = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), x); x = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NANf), x); x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITYf), x); #else x = vmla_vf_vf_vf_vf(x, t, vmul_vf_vf_vf(vcast_vf_f(0.693147180559945286226764f), e)); x = vfixup_vf_vf_vf_vi2_i(x, d, vcast_vi2_i((5 << (5*4))), 0); #endif return x; } EXPORT CONST vfloat xexpf(vfloat d) { vint2 q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(R_LN2f))); vfloat s, u; s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d); s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s); u = vcast_vf_f(0.000198527617612853646278381); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00139304355252534151077271)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833336077630519866943359)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416664853692054748535156)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.166666671633720397949219)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.5)); u = vadd_vf_vf_vf(vcast_vf_f(1.0f), vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, s)); u = vldexp2_vf_vf_vi2(u, q); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-104)), vreinterpret_vm_vf(u))); u = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vcast_vf_f(100), d), vcast_vf_f(SLEEF_INFINITYf), u); return u; } static INLINE CONST vfloat expm1fk(vfloat d) { vint2 q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(R_LN2f))); vfloat s, u; s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf), d); s = vmla_vf_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf), s); u = vcast_vf_f(0.000198527617612853646278381); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00139304355252534151077271)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.00833336077630519866943359)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.0416664853692054748535156)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.166666671633720397949219)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(0.5)); u = vmla_vf_vf_vf_vf(vmul_vf_vf_vf(s, s), u, s); u = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(q, vcast_vi2_i(0)), u, vsub_vf_vf_vf(vldexp2_vf_vf_vi2(vadd_vf_vf_vf(u, vcast_vf_f(1)), q), vcast_vf_f(1))); return u; } #ifdef ENABLE_NEON32 EXPORT CONST vfloat xsqrtf_u35(vfloat d) { vfloat e = vreinterpret_vf_vi2(vadd_vi2_vi2_vi2(vcast_vi2_i(0x20000000), vand_vi2_vi2_vi2(vcast_vi2_i(0x7f000000), vsrl_vi2_vi2_i(vreinterpret_vi2_vf(d), 1)))); vfloat m = vreinterpret_vf_vi2(vadd_vi2_vi2_vi2(vcast_vi2_i(0x3f000000), vand_vi2_vi2_vi2(vcast_vi2_i(0x01ffffff), vreinterpret_vi2_vf(d)))); float32x4_t x = vrsqrteq_f32(m); x = vmulq_f32(x, vrsqrtsq_f32(m, vmulq_f32(x, x))); float32x4_t u = vmulq_f32(x, m); u = vmlaq_f32(u, vmlsq_f32(m, u, u), vmulq_f32(x, vdupq_n_f32(0.5))); e = vreinterpret_vf_vm(vandnot_vm_vo32_vm(veq_vo_vf_vf(d, vcast_vf_f(0)), vreinterpret_vm_vf(e))); u = vmul_vf_vf_vf(e, u); u = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), u); u = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(d), vlt_vo_vf_vf(d, vcast_vf_f(0))), vreinterpret_vm_vf(u))); u = vmulsign_vf_vf_vf(u, d); return u; } #elif defined(ENABLE_VECEXT) EXPORT CONST vfloat xsqrtf_u35(vfloat d) { vfloat q = vsqrt_vf_vf(d); q = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0), q); return vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), q); } #else EXPORT CONST vfloat xsqrtf_u35(vfloat d) { return vsqrt_vf_vf(d); } #endif EXPORT CONST vfloat xcbrtf(vfloat d) { vfloat x, y, q = vcast_vf_f(1.0), t; vint2 e, qu, re; #ifdef ENABLE_AVX512F vfloat s = d; #endif e = vadd_vi2_vi2_vi2(vilogbk_vi2_vf(vabs_vf_vf(d)), vcast_vi2_i(1)); d = vldexp2_vf_vf_vi2(d, vneg_vi2_vi2(e)); t = vadd_vf_vf_vf(vcast_vf_vi2(e), vcast_vf_f(6144)); qu = vtruncate_vi2_vf(vmul_vf_vf_vf(t, vcast_vf_f(1.0f/3.0f))); re = vtruncate_vi2_vf(vsub_vf_vf_vf(t, vmul_vf_vf_vf(vcast_vf_vi2(qu), vcast_vf_f(3)))); q = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(re, vcast_vi2_i(1)), vcast_vf_f(1.2599210498948731647672106f), q); q = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(re, vcast_vi2_i(2)), vcast_vf_f(1.5874010519681994747517056f), q); q = vldexp2_vf_vf_vi2(q, vsub_vi2_vi2_vi2(qu, vcast_vi2_i(2048))); q = vmulsign_vf_vf_vf(q, d); d = vabs_vf_vf(d); x = vcast_vf_f(-0.601564466953277587890625f); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.8208892345428466796875f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-5.532182216644287109375f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(5.898262500762939453125f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-3.8095417022705078125f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.2241256237030029296875f)); y = vmul_vf_vf_vf(vmul_vf_vf_vf(d, x), x); y = vmul_vf_vf_vf(vsub_vf_vf_vf(y, vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(2.0f / 3.0f), y), vmla_vf_vf_vf_vf(y, x, vcast_vf_f(-1.0f)))), q); #ifdef ENABLE_AVX512F y = vsel_vf_vo_vf_vf(visinf_vo_vf(s), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), s), y); y = vsel_vf_vo_vf_vf(veq_vo_vf_vf(s, vcast_vf_f(0)), vmulsign_vf_vf_vf(vcast_vf_f(0), s), y); #endif return y; } EXPORT CONST vfloat xcbrtf_u1(vfloat d) { vfloat x, y, z, t; vfloat2 q2 = vcast_vf2_f_f(1, 0), u, v; vint2 e, qu, re; #ifdef ENABLE_AVX512F vfloat s = d; #endif e = vadd_vi2_vi2_vi2(vilogbk_vi2_vf(vabs_vf_vf(d)), vcast_vi2_i(1)); d = vldexp2_vf_vf_vi2(d, vneg_vi2_vi2(e)); t = vadd_vf_vf_vf(vcast_vf_vi2(e), vcast_vf_f(6144)); qu = vtruncate_vi2_vf(vmul_vf_vf_vf(t, vcast_vf_f(1.0/3.0))); re = vtruncate_vi2_vf(vsub_vf_vf_vf(t, vmul_vf_vf_vf(vcast_vf_vi2(qu), vcast_vf_f(3)))); q2 = vsel_vf2_vo_vf2_vf2(veq_vo_vi2_vi2(re, vcast_vi2_i(1)), vcast_vf2_f_f(1.2599210739135742188f, -2.4018701694217270415e-08), q2); q2 = vsel_vf2_vo_vf2_vf2(veq_vo_vi2_vi2(re, vcast_vi2_i(2)), vcast_vf2_f_f(1.5874010324478149414f, 1.9520385308169352356e-08), q2); q2.x = vmulsign_vf_vf_vf(q2.x, d); q2.y = vmulsign_vf_vf_vf(q2.y, d); d = vabs_vf_vf(d); x = vcast_vf_f(-0.601564466953277587890625f); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.8208892345428466796875f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-5.532182216644287109375f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(5.898262500762939453125f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(-3.8095417022705078125f)); x = vmla_vf_vf_vf_vf(x, d, vcast_vf_f(2.2241256237030029296875f)); y = vmul_vf_vf_vf(x, x); y = vmul_vf_vf_vf(y, y); x = vsub_vf_vf_vf(x, vmul_vf_vf_vf(vmlanp_vf_vf_vf_vf(d, y, x), vcast_vf_f(-1.0 / 3.0))); z = x; u = dfmul_vf2_vf_vf(x, x); u = dfmul_vf2_vf2_vf2(u, u); u = dfmul_vf2_vf2_vf(u, d); u = dfadd2_vf2_vf2_vf(u, vneg_vf_vf(x)); y = vadd_vf_vf_vf(u.x, u.y); y = vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(-2.0 / 3.0), y), z); v = dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf(z, z), y); v = dfmul_vf2_vf2_vf(v, d); v = dfmul_vf2_vf2_vf2(v, q2); z = vldexp2_vf_vf_vi2(vadd_vf_vf_vf(v.x, v.y), vsub_vi2_vi2_vi2(qu, vcast_vi2_i(2048))); z = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), q2.x), z); z = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vreinterpret_vf_vm(vsignbit_vm_vf(q2.x)), z); #ifdef ENABLE_AVX512F z = vsel_vf_vo_vf_vf(visinf_vo_vf(s), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), s), z); z = vsel_vf_vo_vf_vf(veq_vo_vf_vf(s, vcast_vf_f(0)), vmulsign_vf_vf_vf(vcast_vf_f(0), s), z); #endif return z; } static INLINE CONST vfloat2 logkf(vfloat d) { vfloat2 x, x2; vfloat t, m; #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(1LL << 32) * (float)(1LL << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); #endif x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); x2 = dfsqu_vf2_vf2(x); t = vcast_vf_f(0.240320354700088500976562); t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.285112679004669189453125)); t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.400007992982864379882812)); vfloat2 c = vcast_vf2_f_f(0.66666662693023681640625f, 3.69183861259614332084311e-09f); #ifndef ENABLE_AVX512F vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), vcast_vf_vi2(e)); #else vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), e); #endif s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); s = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(x2, x), dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf(x2, t), c))); return s; } EXPORT CONST vfloat xlogf_u1(vfloat d) { vfloat2 x; vfloat t, m, x2; #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(1LL << 32) * (float)(1LL << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), vcast_vf_vi2(e)); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0f/0.75f))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), e); #endif x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); x2 = vmul_vf_vf_vf(x.x, x.x); t = vcast_vf_f(+0.3027294874e+0f); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.3996108174e+0f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.6666694880e+0f)); s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, x.x), t)); vfloat r = vadd_vf_vf_vf(s.x, s.y); #ifndef ENABLE_AVX512F r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), r); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NANf), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITYf), r); #else r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } static INLINE CONST vfloat expkf(vfloat2 d) { vfloat u = vmul_vf_vf_vf(vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(R_LN2f)); vint2 q = vrint_vi2_vf(u); vfloat2 s, t; s = dfadd2_vf2_vf2_vf(d, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf))); s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf))); s = dfnormalize_vf2_vf2(s); u = vcast_vf_f(0.00136324646882712841033936f); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.00836596917361021041870117f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.0416710823774337768554688f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.166665524244308471679688f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(0.499999850988388061523438f)); t = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf(dfsqu_vf2_vf2(s), u)); t = dfadd_vf2_vf_vf2(vcast_vf_f(1), t); u = vadd_vf_vf_vf(t.x, t.y); u = vldexp_vf_vf_vi2(u, q); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d.x, vcast_vf_f(-104)), vreinterpret_vm_vf(u))); return u; } EXPORT CONST vfloat xpowf(vfloat x, vfloat y) { #if 1 vopmask yisint = vor_vo_vo_vo(veq_vo_vf_vf(vtruncate_vf_vf(y), y), vgt_vo_vf_vf(vabs_vf_vf(y), vcast_vf_f(1 << 24))); vopmask yisodd = vand_vo_vo_vo(vand_vo_vo_vo(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vtruncate_vi2_vf(y), vcast_vi2_i(1)), vcast_vi2_i(1)), yisint), vlt_vo_vf_vf(vabs_vf_vf(y), vcast_vf_f(1 << 24))); #ifdef ENABLE_NEON32 yisodd = vandnot_vm_vo32_vm(visinf_vo_vf(y), yisodd); #endif vfloat result = expkf(dfmul_vf2_vf2_vf(logkf(vabs_vf_vf(x)), y)); result = vsel_vf_vo_vf_vf(visnan_vo_vf(result), vcast_vf_f(SLEEF_INFINITYf), result); result = vmul_vf_vf_vf(result, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(x, vcast_vf_f(0)), vcast_vf_f(1), vsel_vf_vo_vf_vf(yisint, vsel_vf_vo_vf_vf(yisodd, vcast_vf_f(-1.0f), vcast_vf_f(1)), vcast_vf_f(SLEEF_NANf)))); vfloat efx = vmulsign_vf_vf_vf(vsub_vf_vf_vf(vabs_vf_vf(x), vcast_vf_f(1)), y); result = vsel_vf_vo_vf_vf(visinf_vo_vf(y), vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(efx, vcast_vf_f(0.0f)), vreinterpret_vm_vf(vsel_vf_vo_vf_vf(veq_vo_vf_vf(efx, vcast_vf_f(0.0f)), vcast_vf_f(1.0f), vcast_vf_f(SLEEF_INFINITYf))))), result); result = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), veq_vo_vf_vf(x, vcast_vf_f(0))), vmul_vf_vf_vf(vsel_vf_vo_vf_vf(yisodd, vsign_vf_vf(x), vcast_vf_f(1)), vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vneg_vf_vf(y), y), vcast_vf_f(0)), vreinterpret_vm_vf(vcast_vf_f(SLEEF_INFINITYf))))), result); result = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(result))); result = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(y, vcast_vf_f(0)), veq_vo_vf_vf(x, vcast_vf_f(1))), vcast_vf_f(1), result); return result; #else return expkf(dfmul_vf2_vf2_vf(logkf(x), y)); #endif } static INLINE CONST vfloat2 expk2f(vfloat2 d) { vfloat u = vmul_vf_vf_vf(vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(R_LN2f)); vint2 q = vrint_vi2_vf(u); vfloat2 s, t; s = dfadd2_vf2_vf2_vf(d, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Uf))); s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vcast_vf_vi2(q), vcast_vf_f(-L2Lf))); u = vcast_vf_f(+0.1980960224e-3f); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(+0.1394256484e-2f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(+0.8333456703e-2f)); u = vmla_vf_vf_vf_vf(u, s.x, vcast_vf_f(+0.4166637361e-1f)); t = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(s, u), vcast_vf_f(+0.166666659414234244790680580464e+0f)); t = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf2(s, t), vcast_vf_f(0.5)); t = dfadd2_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf2(dfsqu_vf2_vf2(s), t)); t = dfadd_vf2_vf_vf2(vcast_vf_f(1), t); t.x = vldexp2_vf_vf_vi2(t.x, q); t.y = vldexp2_vf_vf_vi2(t.y, q); t.x = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d.x, vcast_vf_f(-104)), vreinterpret_vm_vf(t.x))); t.y = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d.x, vcast_vf_f(-104)), vreinterpret_vm_vf(t.y))); return t; } EXPORT CONST vfloat xsinhf(vfloat x) { vfloat y = vabs_vf_vf(x); vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0))); d = dfsub_vf2_vf2_vf2(d, dfrec_vf2_vf2(d)); y = vmul_vf_vf_vf(vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(0.5)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(89)), visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST vfloat xcoshf(vfloat x) { vfloat y = vabs_vf_vf(x); vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0))); d = dfadd_vf2_vf2_vf2(d, dfrec_vf2_vf2(d)); y = vmul_vf_vf_vf(vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(0.5)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(89)), visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST vfloat xtanhf(vfloat x) { vfloat y = vabs_vf_vf(x); vfloat2 d = expk2f(vcast_vf2_vf_vf(y, vcast_vf_f(0))); vfloat2 e = dfrec_vf2_vf2(d); d = dfdiv_vf2_vf2_vf2(dfadd_vf2_vf2_vf2(d, dfneg_vf2_vf2(e)), dfadd_vf2_vf2_vf2(d, e)); y = vadd_vf_vf_vf(d.x, d.y); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(8.664339742f)), visnan_vo_vf(y)), vcast_vf_f(1.0f), y); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST vfloat xsinhf_u35(vfloat x) { vfloat e = expm1fk(vabs_vf_vf(x)); vfloat y = vdiv_vf_vf_vf(vadd_vf_vf_vf(e, vcast_vf_f(2)), vadd_vf_vf_vf(e, vcast_vf_f(1))); y = vmul_vf_vf_vf(y, vmul_vf_vf_vf(vcast_vf_f(0.5f), e)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(88)), visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST vfloat xcoshf_u35(vfloat x) { vfloat e = xexpf(vabs_vf_vf(x)); vfloat y = vmla_vf_vf_vf_vf(vcast_vf_f(0.5f), e, vdiv_vf_vf_vf(vcast_vf_f(0.5), e)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(88)), visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST vfloat xtanhf_u35(vfloat x) { vfloat d = expm1fk(vmul_vf_vf_vf(vcast_vf_f(2), vabs_vf_vf(x))); vfloat y = vdiv_vf_vf_vf(d, vadd_vf_vf_vf(vcast_vf_f(2), d)); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(8.664339742f)), visnan_vo_vf(y)), vcast_vf_f(1.0f), y); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } static INLINE CONST vfloat2 logk2f(vfloat2 d) { vfloat2 x, x2, m, s; vfloat t; vint2 e; #ifndef ENABLE_AVX512F e = vilogbk_vi2_vf(vmul_vf_vf_vf(d.x, vcast_vf_f(1.0f/0.75f))); #else e = vrint_vi2_vf(vgetexp_vf_vf(vmul_vf_vf_vf(d.x, vcast_vf_f(1.0f/0.75f)))); #endif m = dfscale_vf2_vf2_vf(d, vpow2i_vf_vi2(vneg_vi2_vi2(e))); x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(m, vcast_vf_f(-1)), dfadd2_vf2_vf2_vf(m, vcast_vf_f(1))); x2 = dfsqu_vf2_vf2(x); t = vcast_vf_f(0.2392828464508056640625f); t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.28518211841583251953125f)); t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.400005877017974853515625f)); t = vmla_vf_vf_vf_vf(t, x2.x, vcast_vf_f(0.666666686534881591796875f)); s = dfmul_vf2_vf2_vf(vcast_vf2_vf_vf(vcast_vf_f(0.69314718246459960938f), vcast_vf_f(-1.904654323148236017e-09f)), vcast_vf_vi2(e)); s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); s = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf(dfmul_vf2_vf2_vf2(x2, x), t)); return s; } EXPORT CONST vfloat xasinhf(vfloat x) { vfloat y = vabs_vf_vf(x); vopmask o = vgt_vo_vf_vf(y, vcast_vf_f(1)); vfloat2 d; d = vsel_vf2_vo_vf2_vf2(o, dfrec_vf2_vf(x), vcast_vf2_vf_vf(y, vcast_vf_f(0))); d = dfsqrt_vf2_vf2(dfadd2_vf2_vf2_vf(dfsqu_vf2_vf2(d), vcast_vf_f(1))); d = vsel_vf2_vo_vf2_vf2(o, dfmul_vf2_vf2_vf(d, y), d); d = logk2f(dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf(d, x))); y = vadd_vf_vf_vf(d.x, d.y); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(SQRT_FLT_MAX)), visnan_vo_vf(y)), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), x), y); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); y = vsel_vf_vo_vf_vf(visnegzero_vo_vf(x), vcast_vf_f(-0.0), y); return y; } EXPORT CONST vfloat xacoshf(vfloat x) { vfloat2 d = logk2f(dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfsqrt_vf2_vf2(dfadd2_vf2_vf_vf(x, vcast_vf_f(1))), dfsqrt_vf2_vf2(dfadd2_vf2_vf_vf(x, vcast_vf_f(-1)))), x)); vfloat y = vadd_vf_vf_vf(d.x, d.y); y = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vgt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(SQRT_FLT_MAX)), visnan_vo_vf(y)), vcast_vf_f(SLEEF_INFINITYf), y); y = vreinterpret_vf_vm(vandnot_vm_vo32_vm(veq_vo_vf_vf(x, vcast_vf_f(1.0f)), vreinterpret_vm_vf(y))); y = vreinterpret_vf_vm(vor_vm_vo32_vm(vlt_vo_vf_vf(x, vcast_vf_f(1.0f)), vreinterpret_vm_vf(y))); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST vfloat xatanhf(vfloat x) { vfloat y = vabs_vf_vf(x); vfloat2 d = logk2f(dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(1), y), dfadd2_vf2_vf_vf(vcast_vf_f(1), vneg_vf_vf(y)))); y = vreinterpret_vf_vm(vor_vm_vo32_vm(vgt_vo_vf_vf(y, vcast_vf_f(1.0)), vreinterpret_vm_vf(vsel_vf_vo_vf_vf(veq_vo_vf_vf(y, vcast_vf_f(1.0)), vcast_vf_f(SLEEF_INFINITYf), vmul_vf_vf_vf(vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(0.5)))))); y = vreinterpret_vf_vm(vor_vm_vo32_vm(vor_vo_vo_vo(visinf_vo_vf(x), visnan_vo_vf(y)), vreinterpret_vm_vf(y))); y = vmulsign_vf_vf_vf(y, x); y = vreinterpret_vf_vm(vor_vm_vo32_vm(visnan_vo_vf(x), vreinterpret_vm_vf(y))); return y; } EXPORT CONST vfloat xexp2f(vfloat d) { vfloat u = vrint_vf_vf(d), s; vint2 q = vrint_vi2_vf(u); s = vsub_vf_vf_vf(d, u); u = vcast_vf_f(+0.1535920892e-3); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1339262701e-2)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.9618384764e-2)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.5550347269e-1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2402264476e+0)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.6931471825e+0)); #ifdef ENABLE_FMA_SP u = vfma_vf_vf_vf_vf(u, s, vcast_vf_f(1)); #else u = dfnormalize_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf_vf(u, s))).x; #endif u = vldexp2_vf_vf_vi2(u, q); u = vsel_vf_vo_vf_vf(vge_vo_vf_vf(d, vcast_vf_f(128)), vcast_vf_f(SLEEF_INFINITY), u); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-150)), vreinterpret_vm_vf(u))); return u; } EXPORT CONST vfloat xexp10f(vfloat d) { vfloat u = vrint_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(LOG10_2))), s; vint2 q = vrint_vi2_vf(u); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-L10Uf), d); s = vmla_vf_vf_vf_vf(u, vcast_vf_f(-L10Lf), s); u = vcast_vf_f(+0.2064004987e+0); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.5417877436e+0)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.1171286821e+1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2034656048e+1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2650948763e+1)); u = vmla_vf_vf_vf_vf(u, s, vcast_vf_f(+0.2302585125e+1)); #ifdef ENABLE_FMA_SP u = vfma_vf_vf_vf_vf(u, s, vcast_vf_f(1)); #else u = dfnormalize_vf2_vf2(dfadd_vf2_vf_vf2(vcast_vf_f(1), dfmul_vf2_vf_vf(u, s))).x; #endif u = vldexp2_vf_vf_vi2(u, q); u = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(d, vcast_vf_f(38.5318394191036238941387f)), vcast_vf_f(SLEEF_INFINITYf), u); u = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vlt_vo_vf_vf(d, vcast_vf_f(-50)), vreinterpret_vm_vf(u))); return u; } EXPORT CONST vfloat xexpm1f(vfloat a) { vfloat2 d = dfadd2_vf2_vf2_vf(expk2f(vcast_vf2_vf_vf(a, vcast_vf_f(0))), vcast_vf_f(-1.0)); vfloat x = vadd_vf_vf_vf(d.x, d.y); x = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(a, vcast_vf_f(88.72283172607421875f)), vcast_vf_f(SLEEF_INFINITYf), x); x = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(a, vcast_vf_f(-16.635532333438687426013570f)), vcast_vf_f(-1), x); x = vsel_vf_vo_vf_vf(visnegzero_vo_vf(a), vcast_vf_f(-0.0f), x); return x; } EXPORT CONST vfloat xlog10f(vfloat d) { vfloat2 x; vfloat t, m, x2; #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(1LL << 32) * (float)(1LL << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); #endif x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); x2 = vmul_vf_vf_vf(x.x, x.x); t = vcast_vf_f(+0.1314289868e+0); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f( +0.1735493541e+0)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f( +0.2895309627e+0)); #ifndef ENABLE_AVX512F vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.30103001, -1.432098889e-08), vcast_vf_vi2(e)); #else vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.30103001, -1.432098889e-08), e); #endif s = dfadd_vf2_vf2_vf2(s, dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(0.868588984, -2.170757285e-08))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, x.x), t)); vfloat r = vadd_vf_vf_vf(s.x, s.y); #ifndef ENABLE_AVX512F r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITY), r); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NAN), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITY), r); #else r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } EXPORT CONST vfloat xlog2f(vfloat d) { vfloat2 x; vfloat t, m, x2; #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vf_vf(d, vcast_vf_f(FLT_MIN)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f((float)(1LL << 32) * (float)(1LL << 32))), d); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); m = vldexp3_vf_vf_vi2(d, vneg_vi2_vi2(e)); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(d, vcast_vf_f(1.0/0.75))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); m = vgetmant_vf_vf(d); #endif x = dfdiv_vf2_vf2_vf2(dfadd2_vf2_vf_vf(vcast_vf_f(-1), m), dfadd2_vf2_vf_vf(vcast_vf_f(1), m)); x2 = vmul_vf_vf_vf(x.x, x.x); t = vcast_vf_f(+0.4374550283e+0f); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.5764790177e+0f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.9618012905120f)); #ifndef ENABLE_AVX512F vfloat2 s = dfadd2_vf2_vf_vf2(vcast_vf_vi2(e), dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(2.8853900432586669922, 3.2734474483568488616e-08))); #else vfloat2 s = dfadd2_vf2_vf_vf2(e, dfmul_vf2_vf2_vf2(x, vcast_vf2_f_f(2.8853900432586669922, 3.2734474483568488616e-08))); #endif s = dfadd2_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, x.x), t)); vfloat r = vadd_vf_vf_vf(s.x, s.y); #ifndef ENABLE_AVX512F r = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITY), r); r = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(d, vcast_vf_f(0)), visnan_vo_vf(d)), vcast_vf_f(SLEEF_NAN), r); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(-SLEEF_INFINITY), r); #else r = vfixup_vf_vf_vf_vi2_i(r, d, vcast_vi2_i((4 << (2*4)) | (3 << (4*4)) | (5 << (5*4)) | (2 << (6*4))), 0); #endif return r; } EXPORT CONST vfloat xlog1pf(vfloat d) { vfloat2 x; vfloat t, m, x2; vfloat dp1 = vadd_vf_vf_vf(d, vcast_vf_f(1)); #ifndef ENABLE_AVX512F vopmask o = vlt_vo_vf_vf(dp1, vcast_vf_f(FLT_MIN)); dp1 = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(dp1, vcast_vf_f((float)(1LL << 32) * (float)(1LL << 32))), dp1); vint2 e = vilogb2k_vi2_vf(vmul_vf_vf_vf(dp1, vcast_vf_f(1.0f/0.75f))); t = vldexp3_vf_vf_vi2(vcast_vf_f(1), vneg_vi2_vi2(e)); m = vmla_vf_vf_vf_vf(d, t, vsub_vf_vf_vf(t, vcast_vf_f(1))); e = vsel_vi2_vo_vi2_vi2(o, vsub_vi2_vi2_vi2(e, vcast_vi2_i(64)), e); vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), vcast_vf_vi2(e)); #else vfloat e = vgetexp_vf_vf(vmul_vf_vf_vf(dp1, vcast_vf_f(1.0f/0.75f))); e = vsel_vf_vo_vf_vf(vispinf_vo_vf(e), vcast_vf_f(128.0f), e); t = vldexp3_vf_vf_vi2(vcast_vf_f(1), vneg_vi2_vi2(vrint_vi2_vf(e))); m = vmla_vf_vf_vf_vf(d, t, vsub_vf_vf_vf(t, vcast_vf_f(1))); vfloat2 s = dfmul_vf2_vf2_vf(vcast_vf2_f_f(0.69314718246459960938f, -1.904654323148236017e-09f), e); #endif x = dfdiv_vf2_vf2_vf2(vcast_vf2_vf_vf(m, vcast_vf_f(0)), dfadd_vf2_vf_vf(vcast_vf_f(2), m)); x2 = vmul_vf_vf_vf(x.x, x.x); t = vcast_vf_f(+0.3027294874e+0f); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.3996108174e+0f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.6666694880e+0f)); s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, x.x), t)); vfloat r = vadd_vf_vf_vf(s.x, s.y); r = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(d, vcast_vf_f(1e+38)), vcast_vf_f(SLEEF_INFINITYf), r); r = vreinterpret_vf_vm(vor_vm_vo32_vm(vgt_vo_vf_vf(vcast_vf_f(-1), d), vreinterpret_vm_vf(r))); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(-1)), vcast_vf_f(-SLEEF_INFINITYf), r); r = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), r); return r; } // EXPORT CONST vfloat xfabsf(vfloat x) { return vabs_vf_vf(x); } EXPORT CONST vfloat xcopysignf(vfloat x, vfloat y) { return vcopysign_vf_vf_vf(x, y); } EXPORT CONST vfloat xfmaxf(vfloat x, vfloat y) { #if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmax_vf_vf_vf(x, y)); #else return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(x, y), x, y)); #endif } EXPORT CONST vfloat xfminf(vfloat x, vfloat y) { #if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC) return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmin_vf_vf_vf(x, y)); #else return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(y, x), x, y)); #endif } EXPORT CONST vfloat xfdimf(vfloat x, vfloat y) { vfloat ret = vsub_vf_vf_vf(x, y); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(ret, vcast_vf_f(0)), veq_vo_vf_vf(x, y)), vcast_vf_f(0), ret); return ret; } EXPORT CONST vfloat xtruncf(vfloat x) { vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(1LL << 23))), x, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); } EXPORT CONST vfloat xfloorf(vfloat x) { vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); fr = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(fr, vcast_vf_f(0)), vadd_vf_vf_vf(fr, vcast_vf_f(1.0f)), fr); return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(1LL << 23))), x, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); } EXPORT CONST vfloat xceilf(vfloat x) { vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); fr = vsel_vf_vo_vf_vf(vle_vo_vf_vf(fr, vcast_vf_f(0)), fr, vsub_vf_vf_vf(fr, vcast_vf_f(1.0f))); return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(x), vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(1LL << 23))), x, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), x)); } EXPORT CONST vfloat xroundf(vfloat d) { vfloat x = vadd_vf_vf_vf(d, vcast_vf_f(0.5f)); vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); x = vsel_vf_vo_vf_vf(vand_vo_vo_vo(vle_vo_vf_vf(x, vcast_vf_f(0)), veq_vo_vf_vf(fr, vcast_vf_f(0))), vsub_vf_vf_vf(x, vcast_vf_f(1.0f)), x); fr = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(fr, vcast_vf_f(0)), vadd_vf_vf_vf(fr, vcast_vf_f(1.0f)), fr); x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0.4999999701976776123f)), vcast_vf_f(0), x); return vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(d), vge_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1LL << 23))), d, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), d)); } EXPORT CONST vfloat xrintf(vfloat d) { vfloat x = vadd_vf_vf_vf(d, vcast_vf_f(0.5f)); vopmask isodd = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vcast_vi2_i(1), vtruncate_vi2_vf(x)), vcast_vi2_i(1)); vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); fr = vsel_vf_vo_vf_vf(vor_vo_vo_vo(vlt_vo_vf_vf(fr, vcast_vf_f(0)), vand_vo_vo_vo(veq_vo_vf_vf(fr, vcast_vf_f(0)), isodd)), vadd_vf_vf_vf(fr, vcast_vf_f(1.0f)), fr); x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0.50000005960464477539f)), vcast_vf_f(0), x); vfloat ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visinf_vo_vf(d), vge_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(1LL << 23))), d, vcopysign_vf_vf_vf(vsub_vf_vf_vf(x, fr), d)); return ret; } EXPORT CONST vfloat xfmaf(vfloat x, vfloat y, vfloat z) { vfloat h2 = vadd_vf_vf_vf(vmul_vf_vf_vf(x, y), z), q = vcast_vf_f(1); vopmask o = vlt_vo_vf_vf(vabs_vf_vf(h2), vcast_vf_f(1e-38f)); { const float c0 = 1ULL << 25, c1 = c0 * c0, c2 = c1 * c1; x = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(x, vcast_vf_f(c1)), x); y = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(y, vcast_vf_f(c1)), y); z = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(z, vcast_vf_f(c2)), z); q = vsel_vf_vo_vf_vf(o, vcast_vf_f(1.0f / c2), q); } o = vgt_vo_vf_vf(vabs_vf_vf(h2), vcast_vf_f(1e+38f)); { const float c0 = 1ULL << 25, c1 = c0 * c0, c2 = c1 * c1; x = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(x, vcast_vf_f(1.0f / c1)), x); y = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(y, vcast_vf_f(1.0f / c1)), y); z = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(z, vcast_vf_f(1.0f / c2)), z); q = vsel_vf_vo_vf_vf(o, vcast_vf_f(c2), q); } vfloat2 d = dfmul_vf2_vf_vf(x, y); d = dfadd2_vf2_vf2_vf(d, z); vfloat ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(0)), veq_vo_vf_vf(y, vcast_vf_f(0))), z, vadd_vf_vf_vf(d.x, d.y)); o = visinf_vo_vf(z); o = vandnot_vo_vo_vo(visinf_vo_vf(x), o); o = vandnot_vo_vo_vo(visnan_vo_vf(x), o); o = vandnot_vo_vo_vo(visinf_vo_vf(y), o); o = vandnot_vo_vo_vo(visnan_vo_vf(y), o); h2 = vsel_vf_vo_vf_vf(o, z, h2); o = vor_vo_vo_vo(visinf_vo_vf(h2), visnan_vo_vf(h2)); return vsel_vf_vo_vf_vf(o, h2, vmul_vf_vf_vf(ret, q)); } static INLINE CONST vint2 vcast_vi2_i_i(int i0, int i1) { return vcast_vi2_vm(vcast_vm_i_i(i0, i1)); } SQRTFU05_FUNCATR vfloat xsqrtf_u05(vfloat d) { vfloat q; vopmask o; d = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(d, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), d); o = vlt_vo_vf_vf(d, vcast_vf_f(5.2939559203393770e-23f)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(1.8889465931478580e+22f)), d); q = vsel_vf_vo_vf_vf(o, vcast_vf_f(7.2759576141834260e-12f*0.5f), vcast_vf_f(0.5f)); o = vgt_vo_vf_vf(d, vcast_vf_f(1.8446744073709552e+19f)); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(5.4210108624275220e-20f)), d); q = vsel_vf_vo_vf_vf(o, vcast_vf_f(4294967296.0f * 0.5f), q); vfloat x = vreinterpret_vf_vi2(vsub_vi2_vi2_vi2(vcast_vi2_i(0x5f375a86), vsrl_vi2_vi2_i(vreinterpret_vi2_vf(vadd_vf_vf_vf(d, vcast_vf_f(1e-45f))), 1))); x = vmul_vf_vf_vf(x, vsub_vf_vf_vf(vcast_vf_f(1.5f), vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(0.5f), d), x), x))); x = vmul_vf_vf_vf(x, vsub_vf_vf_vf(vcast_vf_f(1.5f), vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(0.5f), d), x), x))); x = vmul_vf_vf_vf(x, vsub_vf_vf_vf(vcast_vf_f(1.5f), vmul_vf_vf_vf(vmul_vf_vf_vf(vmul_vf_vf_vf(vcast_vf_f(0.5f), d), x), x))); x = vmul_vf_vf_vf(x, d); vfloat2 d2 = dfmul_vf2_vf2_vf2(dfadd2_vf2_vf_vf2(d, dfmul_vf2_vf_vf(x, x)), dfrec_vf2_vf(x)); x = vmul_vf_vf_vf(vadd_vf_vf_vf(d2.x, d2.y), q); x = vsel_vf_vo_vf_vf(vispinf_vo_vf(d), vcast_vf_f(SLEEF_INFINITYf), x); x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(0)), d, x); return x; } EXPORT CONST vfloat xsqrtf(vfloat d) { #ifdef ACCURATE_SQRT return vsqrt_vf_vf(d); #endif // fall back to approximation if ACCURATE_SQRT is undefined return xsqrtf_u05(d); } EXPORT CONST vfloat xhypotf_u05(vfloat x, vfloat y) { x = vabs_vf_vf(x); y = vabs_vf_vf(y); vfloat min = vmin_vf_vf_vf(x, y), n = min; vfloat max = vmax_vf_vf_vf(x, y), d = max; vopmask o = vlt_vo_vf_vf(max, vcast_vf_f(FLT_MIN)); n = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(n, vcast_vf_f(1ULL << 24)), n); d = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(d, vcast_vf_f(1ULL << 24)), d); vfloat2 t = dfdiv_vf2_vf2_vf2(vcast_vf2_vf_vf(n, vcast_vf_f(0)), vcast_vf2_vf_vf(d, vcast_vf_f(0))); t = dfmul_vf2_vf2_vf(dfsqrt_vf2_vf2(dfadd2_vf2_vf2_vf(dfsqu_vf2_vf2(t), vcast_vf_f(1))), max); vfloat ret = vadd_vf_vf_vf(t.x, t.y); ret = vsel_vf_vo_vf_vf(visnan_vo_vf(ret), vcast_vf_f(SLEEF_INFINITYf), ret); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(min, vcast_vf_f(0)), max, ret); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vcast_vf_f(SLEEF_NANf), ret); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(SLEEF_INFINITYf)), veq_vo_vf_vf(y, vcast_vf_f(SLEEF_INFINITYf))), vcast_vf_f(SLEEF_INFINITYf), ret); return ret; } EXPORT CONST vfloat xhypotf_u35(vfloat x, vfloat y) { x = vabs_vf_vf(x); y = vabs_vf_vf(y); vfloat min = vmin_vf_vf_vf(x, y), n = min; vfloat max = vmax_vf_vf_vf(x, y), d = max; vfloat t = vdiv_vf_vf_vf(min, max); vfloat ret = vmul_vf_vf_vf(max, vsqrt_vf_vf(vmla_vf_vf_vf_vf(t, t, vcast_vf_f(1)))); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(min, vcast_vf_f(0)), max, ret); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vcast_vf_f(SLEEF_NANf), ret); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(SLEEF_INFINITYf)), veq_vo_vf_vf(y, vcast_vf_f(SLEEF_INFINITYf))), vcast_vf_f(SLEEF_INFINITYf), ret); return ret; } EXPORT CONST vfloat xnextafterf(vfloat x, vfloat y) { x = vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vmulsign_vf_vf_vf(vcast_vf_f(0), y), x); vint2 t, xi2 = vreinterpret_vi2_vf(x); vopmask c = vxor_vo_vo_vo(vsignbit_vo_vf(x), vge_vo_vf_vf(y, x)); xi2 = vsel_vi2_vo_vi2_vi2(c, vsub_vi2_vi2_vi2(vcast_vi2_i(0), vxor_vi2_vi2_vi2(xi2, vcast_vi2_i(1 << 31))), xi2); xi2 = vsel_vi2_vo_vi2_vi2(vneq_vo_vf_vf(x, y), vsub_vi2_vi2_vi2(xi2, vcast_vi2_i(1)), xi2); xi2 = vsel_vi2_vo_vi2_vi2(c, vsub_vi2_vi2_vi2(vcast_vi2_i(0), vxor_vi2_vi2_vi2(xi2, vcast_vi2_i(1 << 31))), xi2); vfloat ret = vreinterpret_vf_vi2(xi2); ret = vsel_vf_vo_vf_vf(vand_vo_vo_vo(veq_vo_vf_vf(ret, vcast_vf_f(0)), vneq_vo_vf_vf(x, vcast_vf_f(0))), vmulsign_vf_vf_vf(vcast_vf_f(0), x), ret); ret = vsel_vf_vo_vf_vf(vand_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(0)), veq_vo_vf_vf(y, vcast_vf_f(0))), y, ret); ret = vsel_vf_vo_vf_vf(vor_vo_vo_vo(visnan_vo_vf(x), visnan_vo_vf(y)), vcast_vf_f(SLEEF_NANf), ret); return ret; } EXPORT CONST vfloat xfrfrexpf(vfloat x) { x = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(FLT_MIN)), vmul_vf_vf_vf(x, vcast_vf_f(1ULL << 30)), x); vmask xm = vreinterpret_vm_vf(x); xm = vand_vm_vm_vm(xm, vcast_vm_i_i(~0x7f800000U, ~0x7f800000U)); xm = vor_vm_vm_vm (xm, vcast_vm_i_i( 0x3f000000U, 0x3f000000U)); vfloat ret = vreinterpret_vf_vm(xm); ret = vsel_vf_vo_vf_vf(visinf_vo_vf(x), vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), x), ret); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), x, ret); return ret; } EXPORT CONST vint2 xexpfrexpf(vfloat x) { /* x = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(FLT_MIN)), vmul_vf_vf_vf(x, vcast_vf_f(1ULL << 63)), x); vint ret = vcastu_vi_vi2(vreinterpret_vi2_vf(x)); ret = vsub_vi_vi_vi(vand_vi_vi_vi(vsrl_vi_vi_i(ret, 20), vcast_vi_i(0x7ff)), vcast_vi_i(0x3fe)); ret = vsel_vi_vo_vi_vi(vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(x, vcast_vf_f(0)), visnan_vo_vf(x)), visinf_vo_vf(x)), vcast_vi_i(0), ret); return ret; */ return vcast_vi2_i(0); } static INLINE CONST vfloat vtoward0f(vfloat x) { vfloat t = vreinterpret_vf_vi2(vsub_vi2_vi2_vi2(vreinterpret_vi2_vf(x), vcast_vi2_i(1))); return vsel_vf_vo_vf_vf(veq_vo_vf_vf(x, vcast_vf_f(0)), vcast_vf_f(0), t); } static INLINE CONST vfloat vptruncf(vfloat x) { #ifdef FULL_FP_ROUNDING return vtruncate_vf_vf(x); #else vfloat fr = vsub_vf_vf_vf(x, vcast_vf_vi2(vtruncate_vi2_vf(x))); return vsel_vf_vo_vf_vf(vge_vo_vf_vf(vabs_vf_vf(x), vcast_vf_f(1LL << 23)), x, vsub_vf_vf_vf(x, fr)); #endif } EXPORT CONST vfloat xfmodf(vfloat x, vfloat y) { vfloat nu = vabs_vf_vf(x), de = vabs_vf_vf(y), s = vcast_vf_f(1), q; vopmask o = vlt_vo_vf_vf(de, vcast_vf_f(FLT_MIN)); nu = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(nu, vcast_vf_f(1ULL << 25)), nu); de = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(de, vcast_vf_f(1ULL << 25)), de); s = vsel_vf_vo_vf_vf(o, vmul_vf_vf_vf(s , vcast_vf_f(1.0f / (1ULL << 25))), s); vfloat rde = vtoward0f(vrec_vf_vf(de)); #ifdef ENABLE_NEON32 rde = vtoward0f(rde); #endif vfloat2 r = vcast_vf2_vf_vf(nu, vcast_vf_f(0)); for(int i=0;i<8;i++) { // ceil(log2(FLT_MAX) / 22)+1 q = vsel_vf_vo_vf_vf(vand_vo_vo_vo(vgt_vo_vf_vf(vadd_vf_vf_vf(de, de), r.x), vge_vo_vf_vf(r.x, de)), vcast_vf_f(1), vmul_vf_vf_vf(vtoward0f(r.x), rde)); r = dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf2(r, dfmul_vf2_vf_vf(vptruncf(q), vneg_vf_vf(de)))); if (vtestallones_i_vo32(vlt_vo_vf_vf(r.x, de))) break; } vfloat ret = vmul_vf_vf_vf(vadd_vf_vf_vf(r.x, r.y), s); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(vadd_vf_vf_vf(r.x, r.y), de), vcast_vf_f(0), ret); ret = vmulsign_vf_vf_vf(ret, x); ret = vsel_vf_vo_vf_vf(vlt_vo_vf_vf(nu, de), x, ret); ret = vsel_vf_vo_vf_vf(veq_vo_vf_vf(de, vcast_vf_f(0)), vcast_vf_f(SLEEF_NANf), ret); return ret; } // static INLINE CONST vfloat2 sinpifk(vfloat d) { vopmask o; vfloat u, s, t; vfloat2 x, s2; u = vmul_vf_vf_vf(d, vcast_vf_f(4.0)); vint2 q = vtruncate_vi2_vf(u); q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)); s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); t = s; s = vmul_vf_vf_vf(s, s); s2 = dfmul_vf2_vf_vf(t, t); // u = vsel_vf_vo_f_f(o, -0.2430611801e-7f, +0.3093842054e-6f); u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, +0.3590577080e-5f, -0.3657307388e-4f)); u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, -0.3259917721e-3f, +0.2490393585e-2f)); x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), vsel_vf2_vo_f_f_f_f(o, 0.015854343771934509277, 4.4940051354032242811e-10, -0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), vsel_vf2_vo_f_f_f_f(o, -0.30842512845993041992, -9.0728339030733922277e-09, 0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_vf2_vf2_vf2(x, vsel_vf2_vo_vf2_vf2(o, s2, vcast_vf2_vf_vf(t, vcast_vf_f(0)))); x = vsel_vf2_vo_vf2_vf2(o, dfadd2_vf2_vf2_vf(x, vcast_vf_f(1)), x); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(4)), vcast_vi2_i(4)); x.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(x.x))); x.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(x.y))); return x; } EXPORT CONST vfloat xsinpif_u05(vfloat d) { vfloat2 x = sinpifk(d); vfloat r = vadd_vf_vf_vf(x.x, x.y); r = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0), r); r = vreinterpret_vf_vm(vandnot_vm_vo32_vm(vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX4f)), vreinterpret_vm_vf(r))); r = vreinterpret_vf_vm(vor_vm_vo32_vm(visinf_vo_vf(d), vreinterpret_vm_vf(r))); return r; } static INLINE CONST vfloat2 cospifk(vfloat d) { vopmask o; vfloat u, s, t; vfloat2 x, s2; u = vmul_vf_vf_vf(d, vcast_vf_f(4.0)); vint2 q = vtruncate_vi2_vf(u); q = vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vxor_vi2_vi2_vi2(vsrl_vi2_vi2_i(q, 31), vcast_vi2_i(1))), vcast_vi2_i(~1)); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(0)); s = vsub_vf_vf_vf(u, vcast_vf_vi2(q)); t = s; s = vmul_vf_vf_vf(s, s); s2 = dfmul_vf2_vf_vf(t, t); // u = vsel_vf_vo_f_f(o, -0.2430611801e-7f, +0.3093842054e-6f); u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, +0.3590577080e-5f, -0.3657307388e-4f)); u = vmla_vf_vf_vf_vf(u, s, vsel_vf_vo_f_f(o, -0.3259917721e-3f, +0.2490393585e-2f)); x = dfadd2_vf2_vf_vf2(vmul_vf_vf_vf(u, s), vsel_vf2_vo_f_f_f_f(o, 0.015854343771934509277, 4.4940051354032242811e-10, -0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_vf2_vf2_vf2(dfmul_vf2_vf2_vf2(s2, x), vsel_vf2_vo_f_f_f_f(o, -0.30842512845993041992, -9.0728339030733922277e-09, 0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_vf2_vf2_vf2(x, vsel_vf2_vo_vf2_vf2(o, s2, vcast_vf2_vf_vf(t, vcast_vf_f(0)))); x = vsel_vf2_vo_vf2_vf2(o, dfadd2_vf2_vf2_vf(x, vcast_vf_f(1)), x); o = veq_vo_vi2_vi2(vand_vi2_vi2_vi2(vadd_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(4)), vcast_vi2_i(4)); x.x = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(x.x))); x.y = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(o, vreinterpret_vm_vf(vcast_vf_f(-0.0))), vreinterpret_vm_vf(x.y))); return x; } EXPORT CONST vfloat xcospif_u05(vfloat d) { vfloat2 x = cospifk(d); vfloat r = vadd_vf_vf_vf(x.x, x.y); r = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX4f)), vcast_vf_f(1), r); r = vreinterpret_vf_vm(vor_vm_vo32_vm(visinf_vo_vf(d), vreinterpret_vm_vf(r))); return r; } #ifdef ENABLE_SVE typedef __sizeless_struct { vfloat2 a, b; } df2; #else typedef struct { vfloat2 a, b; } df2; #endif /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ static CONST df2 gammafk(vfloat a) { vfloat2 clc = vcast_vf2_f_f(0, 0), clln = vcast_vf2_f_f(1, 0), clld = vcast_vf2_f_f(1, 0); vfloat2 v = vcast_vf2_f_f(1, 0), x, y, z; vfloat t, u; vopmask otiny = vlt_vo_vf_vf(vabs_vf_vf(a), vcast_vf_f(1e-30f)), oref = vlt_vo_vf_vf(a, vcast_vf_f(0.5)); x = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_f_f(0, 0), vsel_vf2_vo_vf2_vf2(oref, dfadd2_vf2_vf_vf(vcast_vf_f(1), vneg_vf_vf(a)), vcast_vf2_vf_vf(a, vcast_vf_f(0)))); vopmask o0 = vand_vo_vo_vo(vle_vo_vf_vf(vcast_vf_f(0.5), x.x), vle_vo_vf_vf(x.x, vcast_vf_f(1.2))); vopmask o2 = vle_vo_vf_vf(vcast_vf_f(2.3), x.x); y = dfnormalize_vf2_vf2(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(x, vcast_vf_f(1)), x)); y = dfnormalize_vf2_vf2(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(x, vcast_vf_f(2)), y)); vopmask o = vand_vo_vo_vo(o2, vle_vo_vf_vf(x.x, vcast_vf_f(7))); clln = vsel_vf2_vo_vf2_vf2(o, y, clln); x = vsel_vf2_vo_vf2_vf2(o, dfadd2_vf2_vf2_vf(x, vcast_vf_f(3)), x); t = vsel_vf_vo_vf_vf(o2, vrec_vf_vf(x.x), dfnormalize_vf2_vf2(dfadd2_vf2_vf2_vf(x, vsel_vf_vo_f_f(o0, -1, -2))).x); u = vsel_vf_vo_vo_f_f_f(o2, o0, +0.000839498720672087279971000786, +0.9435157776e+0f, +0.1102489550e-3f); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -5.17179090826059219329394422e-05, +0.8670063615e+0f, +0.8160019934e-4f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -0.000592166437353693882857342347, +0.4826702476e+0f, +0.1528468856e-3f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +6.97281375836585777403743539e-05, -0.8855129778e-1f, -0.2355068718e-3f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +0.000784039221720066627493314301, +0.1013825238e+0f, +0.4962242092e-3f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -0.000229472093621399176949318732, -0.1493408978e+0f, -0.1193488017e-2f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, -0.002681327160493827160473958490, +0.1697509140e+0f, +0.2891599433e-2f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +0.003472222222222222222175164840, -0.2072454542e+0f, -0.7385451812e-2f)); u = vmla_vf_vf_vf_vf(u, t, vsel_vf_vo_vo_f_f_f(o2, o0, +0.083333333333333333335592087900, +0.2705872357e+0f, +0.2058077045e-1f)); y = dfmul_vf2_vf2_vf2(dfadd2_vf2_vf2_vf(x, vcast_vf_f(-0.5)), logk2f(x)); y = dfadd2_vf2_vf2_vf2(y, dfneg_vf2_vf2(x)); y = dfadd2_vf2_vf2_vf2(y, vcast_vf2_d(0.91893853320467278056)); // 0.5*log(2*M_PI) z = dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf (u, t), vsel_vf_vo_f_f(o0, -0.400686534596170958447352690395e+0f, -0.673523028297382446749257758235e-1f)); z = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(z, t), vsel_vf_vo_f_f(o0, +0.822466960142643054450325495997e+0f, +0.322467033928981157743538726901e+0f)); z = dfadd2_vf2_vf2_vf(dfmul_vf2_vf2_vf(z, t), vsel_vf_vo_f_f(o0, -0.577215665946766039837398973297e+0f, +0.422784335087484338986941629852e+0f)); z = dfmul_vf2_vf2_vf(z, t); clc = vsel_vf2_vo_vf2_vf2(o2, y, z); clld = vsel_vf2_vo_vf2_vf2(o2, dfadd2_vf2_vf2_vf(dfmul_vf2_vf_vf(u, t), vcast_vf_f(1)), clld); y = clln; clc = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_d(41.58883083359671856503), // log(2^60) vsel_vf2_vo_vf2_vf2(oref, dfadd2_vf2_vf2_vf2(vcast_vf2_d(1.1447298858494001639), dfneg_vf2_vf2(clc)), clc)); // log(M_PI) clln = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_f_f(1, 0), vsel_vf2_vo_vf2_vf2(oref, clln, clld)); if (!vtestallones_i_vo32(vnot_vo32_vo32(oref))) { t = vsub_vf_vf_vf(a, vmul_vf_vf_vf(vcast_vf_f(1LL << 12), vcast_vf_vi2(vtruncate_vi2_vf(vmul_vf_vf_vf(a, vcast_vf_f(1.0 / (1LL << 12))))))); x = dfmul_vf2_vf2_vf2(clld, sinpifk(t)); } clld = vsel_vf2_vo_vf2_vf2(otiny, vcast_vf2_vf_vf(vmul_vf_vf_vf(a, vcast_vf_f((1LL << 30)*(float)(1LL << 30))), vcast_vf_f(0)), vsel_vf2_vo_vf2_vf2(oref, x, y)); df2 ret = { clc, dfdiv_vf2_vf2_vf2(clln, clld) }; return ret; } EXPORT CONST vfloat xtgammaf_u1(vfloat a) { df2 d = gammafk(a); vfloat2 y = dfmul_vf2_vf2_vf2(expk2f(d.a), d.b); vfloat r = vadd_vf_vf_vf(y.x, y.y); vopmask o; o = vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(a, vcast_vf_f(-SLEEF_INFINITYf)), vand_vo_vo_vo(vlt_vo_vf_vf(a, vcast_vf_f(0)), visint_vo_vf(a))), vand_vo_vo_vo(vand_vo_vo_vo(visnumber_vo_vf(a), vlt_vo_vf_vf(a, vcast_vf_f(0))), visnan_vo_vf(r))); r = vsel_vf_vo_vf_vf(o, vcast_vf_f(SLEEF_NANf), r); o = vand_vo_vo_vo(vand_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(a, vcast_vf_f(SLEEF_INFINITYf)), visnumber_vo_vf(a)), vge_vo_vf_vf(a, vcast_vf_f(-FLT_MIN))), vor_vo_vo_vo(vor_vo_vo_vo(veq_vo_vf_vf(a, vcast_vf_f(0)), vgt_vo_vf_vf(a, vcast_vf_f(36))), visnan_vo_vf(r))); r = vsel_vf_vo_vf_vf(o, vmulsign_vf_vf_vf(vcast_vf_f(SLEEF_INFINITYf), a), r); return r; } EXPORT CONST vfloat xlgammaf_u1(vfloat a) { df2 d = gammafk(a); vfloat2 y = dfadd2_vf2_vf2_vf2(d.a, logk2f(dfabs_vf2_vf2(d.b))); vfloat r = vadd_vf_vf_vf(y.x, y.y); vopmask o; o = vor_vo_vo_vo(visinf_vo_vf(a), vor_vo_vo_vo(vand_vo_vo_vo(vle_vo_vf_vf(a, vcast_vf_f(0)), visint_vo_vf(a)), vand_vo_vo_vo(visnumber_vo_vf(a), visnan_vo_vf(r)))); r = vsel_vf_vo_vf_vf(o, vcast_vf_f(SLEEF_INFINITYf), r); return r; } /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ EXPORT CONST vfloat xerff_u1(vfloat a) { vfloat s = a, t, u; vfloat2 d; a = vabs_vf_vf(a); vopmask o0 = vlt_vo_vf_vf(a, vcast_vf_f(1.1)); vopmask o1 = vlt_vo_vf_vf(a, vcast_vf_f(2.4)); vopmask o2 = vlt_vo_vf_vf(a, vcast_vf_f(4.0)); u = vsel_vf_vo_vf_vf(o0, vmul_vf_vf_vf(a, a), a); t = vsel_vf_vo_vo_f_f_f(o0, o1, +0.7089292194e-4f, -0.1792667899e-4f, -0.9495757695e-5f); t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, -0.7768311189e-3f, +0.3937633010e-3f, +0.2481465926e-3f)); t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, +0.5159463733e-2f, -0.3949181177e-2f, -0.2918176819e-2f)); t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, -0.2683781274e-1f, +0.2445474640e-1f, +0.2059706673e-1f)); t = vmla_vf_vf_vf_vf(t, u, vsel_vf_vo_vo_f_f_f(o0, o1, +0.1128318012e+0f, -0.1070996150e+0f, -0.9901899844e-1f)); d = dfmul_vf2_vf_vf(t, u); d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_d_d_d(o0, o1, -0.376125876000657465175213237214e+0, -0.634588905908410389971210809210e+0, -0.643598050547891613081201721633e+0)); d = dfmul_vf2_vf2_vf(d, u); d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_d_d_d(o0, o1, +0.112837916021059138255978217023e+1, -0.112879855826694507209862753992e+1, -0.112461487742845562801052956293e+1)); d = dfmul_vf2_vf2_vf(d, a); d = vsel_vf2_vo_vf2_vf2(o0, d, dfadd_vf2_vf_vf2(vcast_vf_f(1.0), dfneg_vf2_vf2(expk2f(d)))); u = vmulsign_vf_vf_vf(vsel_vf_vo_vf_vf(o2, vadd_vf_vf_vf(d.x, d.y), vcast_vf_f(1)), s); u = vsel_vf_vo_vf_vf(visnan_vo_vf(a), vcast_vf_f(SLEEF_NANf), u); return u; } /* TODO AArch64: potential optimization by using `vfmad_lane_f64` */ EXPORT CONST vfloat xerfcf_u15(vfloat a) { vfloat s = a, r = vcast_vf_f(0), t; vfloat2 u, d, x; a = vabs_vf_vf(a); vopmask o0 = vlt_vo_vf_vf(a, vcast_vf_f(1.0)); vopmask o1 = vlt_vo_vf_vf(a, vcast_vf_f(2.2)); vopmask o2 = vlt_vo_vf_vf(a, vcast_vf_f(4.3)); vopmask o3 = vlt_vo_vf_vf(a, vcast_vf_f(10.1)); u = vsel_vf2_vo_vf2_vf2(o1, vcast_vf2_vf_vf(a, vcast_vf_f(0)), dfdiv_vf2_vf2_vf2(vcast_vf2_f_f(1, 0), vcast_vf2_vf_vf(a, vcast_vf_f(0)))); t = vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, -0.8638041618e-4f, -0.6236977242e-5f, -0.3869504035e+0f, +0.1115344167e+1f); t = vmla_vf_vf_vf_vf(t, u.x, vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, +0.6000166177e-3f, +0.5749821503e-4f, +0.1288077235e+1f, -0.9454904199e+0f)); t = vmla_vf_vf_vf_vf(t, u.x, vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, -0.1665703603e-2f, +0.6002851478e-5f, -0.1816803217e+1f, -0.3667259514e+0f)); t = vmla_vf_vf_vf_vf(t, u.x, vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, +0.1795156277e-3f, -0.2851036377e-2f, +0.1249150872e+1f, +0.7155663371e+0f)); t = vmla_vf_vf_vf_vf(t, u.x, vsel_vf_vo_vo_vo_f_f_f_f(o0, o1, o2, +0.1914106123e-1f, +0.2260518074e-1f, -0.1328857988e+0f, -0.1262947265e-1f)); d = dfmul_vf2_vf2_vf(u, t); d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.102775359343930288081655368891e+0, -0.105247583459338632253369014063e+0, -0.482365310333045318680618892669e+0, -0.498961546254537647970305302739e+0)); d = dfmul_vf2_vf2_vf2(d, u); d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.636619483208481931303752546439e+0, -0.635609463574589034216723775292e+0, -0.134450203224533979217859332703e-2, -0.471199543422848492080722832666e-4)); d = dfmul_vf2_vf2_vf2(d, u); d = dfadd2_vf2_vf2_vf2(d, vsel_vf2_vo_vo_vo_d_d_d_d(o0, o1, o2, -0.112837917790537404939545770596e+1, -0.112855987376668622084547028949e+1, -0.572319781150472949561786101080e+0, -0.572364030327966044425932623525e+0)); x = dfmul_vf2_vf2_vf(vsel_vf2_vo_vf2_vf2(o1, d, vcast_vf2_vf_vf(vneg_vf_vf(a), vcast_vf_f(0))), a); x = vsel_vf2_vo_vf2_vf2(o1, x, dfadd2_vf2_vf2_vf2(x, d)); x = expk2f(x); x = vsel_vf2_vo_vf2_vf2(o1, x, dfmul_vf2_vf2_vf2(x, u)); r = vsel_vf_vo_vf_vf(o3, vadd_vf_vf_vf(x.x, x.y), vcast_vf_f(0)); r = vsel_vf_vo_vf_vf(vsignbit_vo_vf(s), vsub_vf_vf_vf(vcast_vf_f(2), r), r); r = vsel_vf_vo_vf_vf(visnan_vo_vf(s), vcast_vf_f(SLEEF_NANf), r); return r; } #ifndef ENABLE_GNUABI EXPORT CONST int xgetIntf(int name) { if (1 <= name && name <= 10) return vavailability_i(name); return 0; } EXPORT CONST void *xgetPtrf(int name) { if (name == 0) return ISANAME; return (void *)0; } #endif #ifdef ALIAS_NO_EXT_SUFFIX #include ALIAS_NO_EXT_SUFFIX #endif #ifdef ENABLE_GNUABI EXPORT CONST vfloat __acosf_finite (vfloat) __attribute__((weak, alias(str_xacosf_u1 ))); EXPORT CONST vfloat __acoshf_finite (vfloat) __attribute__((weak, alias(str_xacoshf ))); EXPORT CONST vfloat __asinf_finite (double) __attribute__((weak, alias(str_xasinf_u1 ))); EXPORT CONST vfloat __atan2f_finite (vfloat, vfloat) __attribute__((weak, alias(str_xatan2f_u1 ))); EXPORT CONST vfloat __atanhf_finite (vfloat) __attribute__((weak, alias(str_xatanhf ))); EXPORT CONST vfloat __coshf_finite (vfloat) __attribute__((weak, alias(str_xcoshf ))); EXPORT CONST vfloat __exp10f_finite (vfloat) __attribute__((weak, alias(str_xexp10f ))); EXPORT CONST vfloat __exp2f_finite (vfloat) __attribute__((weak, alias(str_xexp2f ))); EXPORT CONST vfloat __expf_finite (vfloat) __attribute__((weak, alias(str_xexpf ))); EXPORT CONST vfloat __fmodf_finite (vfloat, vfloat) __attribute__((weak, alias(str_xfmodf ))); EXPORT CONST vfloat __modff_finite (vfloat, vfloat *) __attribute__((weak, alias(str_xmodff ))); EXPORT CONST vfloat __hypotf_u05_finite(vfloat, vfloat) __attribute__((weak, alias(str_xhypotf_u05))); EXPORT CONST vfloat __lgammaf_u1_finite(vfloat) __attribute__((weak, alias(str_xlgammaf_u1))); EXPORT CONST vfloat __log10f_finite (vfloat) __attribute__((weak, alias(str_xlog10f ))); EXPORT CONST vfloat __logf_finite (vfloat) __attribute__((weak, alias(str_xlogf_u1 ))); EXPORT CONST vfloat __powf_finite (vfloat, vfloat) __attribute__((weak, alias(str_xpowf ))); EXPORT CONST vfloat __sinhf_finite (vfloat) __attribute__((weak, alias(str_xsinhf ))); EXPORT CONST vfloat __sqrtf_finite (vfloat) __attribute__((weak, alias(str_xsqrtf ))); EXPORT CONST vfloat __tgammaf_u1_finite(vfloat) __attribute__((weak, alias(str_xtgammaf_u1))); #ifdef HEADER_MASKED #include HEADER_MASKED #endif #endif /* #ifdef ENABLE_GNUABI */ #ifdef ENABLE_MAIN // gcc -DENABLE_MAIN -Wno-attributes -I../common -I../arch -DENABLE_AVX2 -mavx2 -mfma sleefsimdsp.c ../common/common.c -lm #include #include int main(int argc, char **argv) { vfloat vf1 = vcast_vf_f(atof(argv[1])); //vfloat vf2 = vcast_vf_f(atof(argv[2])); //vfloat r = xpowf(vf1, vf2); //vfloat r = xsqrtf_u05(vf1); //printf("%g\n", xnextafterf(vf1, vf2)[0]); //printf("%g\n", nextafterf(atof(argv[1]), atof(argv[2]))); printf("t = %.20g\n", xlogf_u1(vf1)[0]); printf("c = %.20g\n", logf(atof(argv[1]))); } #endif sleef-3.3.1/src/libm/sleefsp.c000066400000000000000000001624741333715643700161650ustar00rootroot00000000000000// Copyright Naoki Shibata 2010 - 2018. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) // Always use -ffp-contract=off option to compile SLEEF. #include #include #include #include #include #ifndef ENABLE_BUILTIN_MATH #include #define SQRTF sqrtf #else #define SQRTF __builtin_sqrtf #endif #include "misc.h" extern const float rempitabsp[]; #ifdef DORENAME #include "rename.h" #endif #if (defined(_MSC_VER)) #pragma fp_contract (off) #endif static INLINE CONST int32_t floatToRawIntBits(float d) { union { float f; int32_t i; } tmp; tmp.f = d; return tmp.i; } static INLINE CONST float intBitsToFloat(int32_t i) { union { float f; int32_t i; } tmp; tmp.i = i; return tmp.f; } static INLINE CONST float fabsfk(float x) { return intBitsToFloat(0x7fffffffL & floatToRawIntBits(x)); } static INLINE CONST float mulsignf(float x, float y) { return intBitsToFloat(floatToRawIntBits(x) ^ (floatToRawIntBits(y) & (1 << 31))); } static INLINE CONST float copysignfk(float x, float y) { return intBitsToFloat((floatToRawIntBits(x) & ~(1 << 31)) ^ (floatToRawIntBits(y) & (1 << 31))); } static INLINE CONST float signf(float d) { return mulsignf(1, d); } static INLINE CONST float mlaf(float x, float y, float z) { return x * y + z; } static INLINE CONST float rintfk(float x) { return x < 0 ? (int)(x - 0.5f) : (int)(x + 0.5f); } static INLINE CONST int ceilfk(float x) { return (int)x + (x < 0 ? 0 : 1); } static INLINE CONST float fminfk(float x, float y) { return x < y ? x : y; } static INLINE CONST float fmaxfk(float x, float y) { return x > y ? x : y; } static INLINE CONST int xisintf(float x) { return (x == (int)x); } static INLINE CONST int xisnanf(float x) { return x != x; } static INLINE CONST int xisinff(float x) { return x == SLEEF_INFINITYf || x == -SLEEF_INFINITYf; } static INLINE CONST int xisminff(float x) { return x == -SLEEF_INFINITYf; } static INLINE CONST int xispinff(float x) { return x == SLEEF_INFINITYf; } static INLINE CONST int xisnegzerof(float x) { return floatToRawIntBits(x) == floatToRawIntBits(-0.0); } static INLINE CONST int xisnumberf(float x) { return !xisinff(x) && !xisnanf(x); } static INLINE CONST int ilogbkf(float d) { int m = d < 5.421010862427522E-20f; d = m ? 1.8446744073709552E19f * d : d; int q = (floatToRawIntBits(d) >> 23) & 0xff; q = m ? q - (64 + 0x7f) : q - 0x7f; return q; } // vilogb2kf is similar to ilogbkf, but the argument has to be a // normalized FP value. static INLINE CONST int ilogb2kf(float d) { return ((floatToRawIntBits(d) >> 23) & 0xff) - 0x7f; } EXPORT CONST int xilogbf(float d) { int e = ilogbkf(fabsfk(d)); e = d == 0.0f ? SLEEF_FP_ILOGB0 : e; e = xisnanf(d) ? SLEEF_FP_ILOGBNAN : e; e = xisinff(d) ? INT_MAX : e; return e; } static INLINE CONST float pow2if(int q) { return intBitsToFloat(((int32_t)(q + 0x7f)) << 23); } static INLINE CONST float ldexpkf(float x, int q) { float u; int m; m = q >> 31; m = (((m + q) >> 6) - m) << 4; q = q - (m << 2); m += 127; m = m < 0 ? 0 : m; m = m > 255 ? 255 : m; u = intBitsToFloat(((int32_t)m) << 23); x = x * u * u * u * u; u = intBitsToFloat(((int32_t)(q + 0x7f)) << 23); return x * u; } static INLINE CONST float ldexp2kf(float d, int e) { // faster than ldexpkf, short reach return d * pow2if(e >> 1) * pow2if(e - (e >> 1)); } static INLINE CONST float ldexp3kf(float d, int e) { // very fast, no denormal return intBitsToFloat(floatToRawIntBits(d) + (e << 23)); } // #ifndef NDEBUG static int checkfp(float x) { if (xisinff(x) || xisnanf(x)) return 1; return 0; } #endif static INLINE CONST float upperf(float d) { return intBitsToFloat(floatToRawIntBits(d) & 0xfffff000); } static INLINE CONST Sleef_float2 df(float h, float l) { Sleef_float2 ret; ret.x = h; ret.y = l; return ret; } static INLINE CONST Sleef_float2 dfx(double d) { Sleef_float2 ret; ret.x = d; ret.y = d - ret.x; return ret; } static INLINE CONST Sleef_float2 dfnormalize_f2_f2(Sleef_float2 t) { Sleef_float2 s; s.x = t.x + t.y; s.y = t.x - s.x + t.y; return s; } static INLINE CONST Sleef_float2 dfscale_f2_f2_f(Sleef_float2 d, float s) { Sleef_float2 r; r.x = d.x * s; r.y = d.y * s; return r; } static INLINE CONST Sleef_float2 dfneg_f2_f2(Sleef_float2 d) { Sleef_float2 r; r.x = -d.x; r.y = -d.y; return r; } static INLINE CONST Sleef_float2 dfabs_f2_f2(Sleef_float2 x) { return df(x.x < 0 ? -x.x : x.x, x.x < 0 ? -x.y : x.y); } static INLINE CONST Sleef_float2 dfadd_f2_f_f(float x, float y) { // |x| >= |y| Sleef_float2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y) || fabsfk(x) >= fabsfk(y))) fprintf(stderr, "[dfadd_f2_f_f : %g, %g]", x, y); #endif r.x = x + y; r.y = x - r.x + y; return r; } static INLINE CONST Sleef_float2 dfadd2_f2_f_f(float x, float y) { Sleef_float2 r; r.x = x + y; float v = r.x - x; r.y = (x - (r.x - v)) + (y - v); return r; } static INLINE CONST Sleef_float2 dfadd_f2_f2_f(Sleef_float2 x, float y) { // |x| >= |y| Sleef_float2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y) || fabsfk(x.x) >= fabsfk(y))) fprintf(stderr, "[dfadd_f2_f2_f : %g %g]", x.x, y); #endif r.x = x.x + y; r.y = x.x - r.x + y + x.y; return r; } static INLINE CONST Sleef_float2 dfadd_f2_f_f2(float x, Sleef_float2 y) { // |x| >= |y| Sleef_float2 r; #ifndef NDEBUG if (!(checkfp(x) || checkfp(y.x) || fabsfk(x) >= fabsfk(y.x))) { fprintf(stderr, "[dfadd_f2_f_f2 : %g %g]\n", x, y.x); fflush(stderr); } #endif r.x = x + y.x; r.y = x - r.x + y.x + y.y; return r; } static INLINE CONST Sleef_float2 dfadd2_f2_f2_f(Sleef_float2 x, float y) { // |x| >= |y| Sleef_float2 r; r.x = x.x + y; float v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y - v); r.y += x.y; return r; } static INLINE CONST Sleef_float2 dfadd2_f2_f_f2(float x, Sleef_float2 y) { Sleef_float2 r; r.x = x + y.x; float v = r.x - x; r.y = (x - (r.x - v)) + (y.x - v) + y.y; return r; } static INLINE CONST Sleef_float2 dfadd_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { // |x| >= |y| Sleef_float2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || fabsfk(x.x) >= fabsfk(y.x))) fprintf(stderr, "[dfadd_f2_f2_f2 : %g %g]", x.x, y.x); #endif r.x = x.x + y.x; r.y = x.x - r.x + y.x + x.y + y.y; return r; } static INLINE CONST Sleef_float2 dfadd2_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { Sleef_float2 r; r.x = x.x + y.x; float v = r.x - x.x; r.y = (x.x - (r.x - v)) + (y.x - v); r.y += x.y + y.y; return r; } static INLINE CONST Sleef_float2 dfsub_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { // |x| >= |y| Sleef_float2 r; #ifndef NDEBUG if (!(checkfp(x.x) || checkfp(y.x) || fabsfk(x.x) >= fabsfk(y.x))) fprintf(stderr, "[dfsub_f2_f2_f2 : %g %g]", x.x, y.x); #endif r.x = x.x - y.x; r.y = x.x - r.x - y.x + x.y - y.y; return r; } static INLINE CONST Sleef_float2 dfdiv_f2_f2_f2(Sleef_float2 n, Sleef_float2 d) { float t = 1.0f / d.x; float dh = upperf(d.x), dl = d.x - dh; float th = upperf(t ), tl = t - th; float nhh = upperf(n.x), nhl = n.x - nhh; Sleef_float2 q; q.x = n.x * t; float u = -q.x + nhh * th + nhh * tl + nhl * th + nhl * tl + q.x * (1 - dh * th - dh * tl - dl * th - dl * tl); q.y = t * (n.y - q.x * d.y) + u; return q; } static INLINE CONST Sleef_float2 dfmul_f2_f_f(float x, float y) { float xh = upperf(x), xl = x - xh; float yh = upperf(y), yl = y - yh; Sleef_float2 r; r.x = x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl; return r; } static INLINE CONST Sleef_float2 dfmul_f2_f2_f(Sleef_float2 x, float y) { float xh = upperf(x.x), xl = x.x - xh; float yh = upperf(y ), yl = y - yh; Sleef_float2 r; r.x = x.x * y; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.y * y; return r; } static INLINE CONST Sleef_float2 dfmul_f2_f2_f2(Sleef_float2 x, Sleef_float2 y) { float xh = upperf(x.x), xl = x.x - xh; float yh = upperf(y.x), yl = y.x - yh; Sleef_float2 r; r.x = x.x * y.x; r.y = xh * yh - r.x + xl * yh + xh * yl + xl * yl + x.x * y.y + x.y * y.x; return r; } static INLINE CONST float dfmul_f_f2_f2(Sleef_float2 x, Sleef_float2 y) { float xh = upperf(x.x), xl = x.x - xh; float yh = upperf(y.x), yl = y.x - yh; return x.y * yh + xh * y.y + xl * yl + xh * yl + xl * yh + xh * yh; } static INLINE CONST Sleef_float2 dfsqu_f2_f2(Sleef_float2 x) { float xh = upperf(x.x), xl = x.x - xh; Sleef_float2 r; r.x = x.x * x.x; r.y = xh * xh - r.x + (xh + xh) * xl + xl * xl + x.x * (x.y + x.y); return r; } static INLINE CONST float dfsqu_f_f2(Sleef_float2 x) { float xh = upperf(x.x), xl = x.x - xh; return xh * x.y + xh * x.y + xl * xl + (xh * xl + xh * xl) + xh * xh; } static INLINE CONST Sleef_float2 dfrec_f2_f(float d) { float t = 1.0f / d; float dh = upperf(d), dl = d - dh; float th = upperf(t), tl = t - th; Sleef_float2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl); return q; } static INLINE CONST Sleef_float2 dfrec_f2_f2(Sleef_float2 d) { float t = 1.0f / d.x; float dh = upperf(d.x), dl = d.x - dh; float th = upperf(t ), tl = t - th; Sleef_float2 q; q.x = t; q.y = t * (1 - dh * th - dh * tl - dl * th - dl * tl - d.y * t); return q; } static INLINE CONST Sleef_float2 dfsqrt_f2_f2(Sleef_float2 d) { float t = SQRTF(d.x + d.y); return dfscale_f2_f2_f(dfmul_f2_f2_f2(dfadd2_f2_f2_f2(d, dfmul_f2_f_f(t, t)), dfrec_f2_f(t)), 0.5f); } static INLINE CONST Sleef_float2 dfsqrt_f2_f(float d) { float t = SQRTF(d); return dfscale_f2_f2_f(dfmul_f2_f2_f2(dfadd2_f2_f_f2(d, dfmul_f2_f_f(t, t)), dfrec_f2_f(t)), 0.5); } // typedef struct { float d; int32_t i; } fi_t; typedef struct { Sleef_float2 df; int32_t i; } dfi_t; static CONST fi_t rempisubf(float x) { fi_t ret; float fr = x - (float)(1LL << 10) * (int32_t)(x * (1.0f / (1LL << 10))); ret.i = ((7 & ((x > 0 ? 4 : 3) + (int32_t)(fr * 8))) - 3) >> 1; fr = fr - 0.25f * (int32_t)(fr * 4 + mulsignf(0.5f, x)); fr = fabsfk(fr) > 0.125f ? (fr - mulsignf(0.5f, x)) : fr; fr = fabsfk(fr) > 1e+10f ? 0 : fr; if (fabsfk(x) == 0.12499999254941940308f) { fr = x; ret.i = 0; } ret.d = fr; return ret; } static CONST dfi_t rempif(float a) { Sleef_float2 x, y, z; fi_t di; float t; int ex = ilogb2kf(a) - 25, q = ex > (90 - 25) ? -64 : 0; a = ldexp3kf(a, q); if (ex < 0) ex = 0; ex *= 4; x = dfmul_f2_f_f(a, rempitabsp[ex]); di = rempisubf(x.x); q = di.i; x.x = di.d; x = dfnormalize_f2_f2(x); y = dfmul_f2_f_f(a, rempitabsp[ex+1]); x = dfadd2_f2_f2_f2(x, y); di = rempisubf(x.x); q += di.i; x.x = di.d; x = dfnormalize_f2_f2(x); y = dfmul_f2_f2_f(df(rempitabsp[ex+2], rempitabsp[ex+3]), a); x = dfadd2_f2_f2_f2(x, y); x = dfnormalize_f2_f2(x); x = dfmul_f2_f2_f2(x, df(3.1415927410125732422f*2, -8.7422776573475857731e-08f*2)); dfi_t ret = { fabsfk(a) < 0.7f ? df(a, 0) : x, q }; return ret; } EXPORT CONST float xsinf(float d) { int q; float u, s, t = d; if (fabsfk(d) < TRIGRANGEMAX2f) { q = (int)rintfk(d * (float)M_1_PI); d = mlaf(q, -PI_A2f, d); d = mlaf(q, -PI_B2f, d); d = mlaf(q, -PI_C2f, d); } else if (fabsfk(d) < TRIGRANGEMAXf) { q = (int)rintfk(d * (float)M_1_PI); d = mlaf(q, -PI_Af, d); d = mlaf(q, -PI_Bf, d); d = mlaf(q, -PI_Cf, d); d = mlaf(q, -PI_Df, d); } else { dfi_t dfi = rempif(t); q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 1) >> 2; if ((dfi.i & 1) != 0) { dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x), mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x))); } d = dfi.df.x + dfi.df.y; if (xisinff(t) || xisnanf(t)) d = SLEEF_NANf; } s = d * d; if ((q & 1) != 0) d = -d; u = 2.6083159809786593541503e-06f; u = mlaf(u, s, -0.0001981069071916863322258f); u = mlaf(u, s, 0.00833307858556509017944336f); u = mlaf(u, s, -0.166666597127914428710938f); u = mlaf(s, u * d, d); if (xisnegzerof(t)) u = -0.0f; return u; } EXPORT CONST float xsinf_u1(float d) { int q; float u; Sleef_float2 s, t, x; if (fabsfk(d) < TRIGRANGEMAX2f) { q = (int)rintfk(d * (float)M_1_PI); u = mlaf(q, -PI_A2f, d); s = dfadd2_f2_f_f(u, q * (-PI_B2f)); s = dfadd_f2_f2_f(s, q * (-PI_C2f)); } else { dfi_t dfi = rempif(d); q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 1) >> 2; if ((dfi.i & 1) != 0) { dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x), mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x))); } s = dfnormalize_f2_f2(dfi.df); if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; } t = s; s = dfsqu_f2_f2(s); u = 2.6083159809786593541503e-06f; u = mlaf(u, s.x, -0.0001981069071916863322258f); u = mlaf(u, s.x, 0.00833307858556509017944336f); x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f(-0.166666597127914428710938f, u * s.x), s)); u = dfmul_f_f2_f2(t, x); if ((q & 1) != 0) u = -u; if (xisnegzerof(d)) u = d; return u; } EXPORT CONST float xcosf(float d) { int q; float u, s, t = d; if (fabsfk(d) < TRIGRANGEMAX2f) { q = 1 + 2*(int)rintfk(d * (float)M_1_PI - 0.5f); d = mlaf(q, -PI_A2f*0.5f, d); d = mlaf(q, -PI_B2f*0.5f, d); d = mlaf(q, -PI_C2f*0.5f, d); } else if (fabsfk(d) < TRIGRANGEMAXf) { q = 1 + 2*(int)rintfk(d * (float)M_1_PI - 0.5f); d = mlaf(q, -PI_Af*0.5f, d); d = mlaf(q, -PI_Bf*0.5f, d); d = mlaf(q, -PI_Cf*0.5f, d); d = mlaf(q, -PI_Df*0.5f, d); } else { dfi_t dfi = rempif(t); q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 7) >> 1; if ((dfi.i & 1) == 0) { dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x > 0 ? 1 : -1), mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x > 0 ? 1 : -1))); } d = dfi.df.x + dfi.df.y; if (xisinff(t) || xisnanf(t)) d = SLEEF_NANf; } s = d * d; if ((q & 2) == 0) d = -d; u = 2.6083159809786593541503e-06f; u = mlaf(u, s, -0.0001981069071916863322258f); u = mlaf(u, s, 0.00833307858556509017944336f); u = mlaf(u, s, -0.166666597127914428710938f); u = mlaf(s, u * d, d); return u; } EXPORT CONST float xcosf_u1(float d) { float u; Sleef_float2 s, t, x; int q; if (fabsfk(d) < TRIGRANGEMAX2f) { d = fabsfk(d); float dq = mlaf(rintfk(d * (float)M_1_PI - 0.5f), 2, 1); q = (int)dq; s = dfadd2_f2_f_f (d, dq * (-PI_A2f*0.5f)); s = dfadd2_f2_f2_f(s, dq * (-PI_B2f*0.5f)); s = dfadd2_f2_f2_f(s, dq * (-PI_C2f*0.5f)); } else { dfi_t dfi = rempif(d); q = ((dfi.i & 3) * 2 + (dfi.df.x > 0) + 7) >> 1; if ((dfi.i & 1) == 0) { dfi.df = dfadd2_f2_f2_f2(dfi.df, df(mulsignf(3.1415927410125732422f*-0.5, dfi.df.x > 0 ? 1 : -1), mulsignf(-8.7422776573475857731e-08f*-0.5, dfi.df.x > 0 ? 1 : -1))); } s = dfnormalize_f2_f2(dfi.df); if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; } t = s; s = dfsqu_f2_f2(s); u = 2.6083159809786593541503e-06f; u = mlaf(u, s.x, -0.0001981069071916863322258f); u = mlaf(u, s.x, 0.00833307858556509017944336f); x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f(-0.166666597127914428710938f, u * s.x), s)); u = dfmul_f_f2_f2(t, x); if ((((int)q) & 2) == 0) u = -u; return u; } EXPORT CONST Sleef_float2 xsincosf(float d) { int q; float u, s, t; Sleef_float2 r; s = d; if (fabsfk(d) < TRIGRANGEMAX2f) { q = (int)rintfk(d * ((float)(2 * M_1_PI))); s = mlaf(q, -PI_A2f*0.5f, s); s = mlaf(q, -PI_B2f*0.5f, s); s = mlaf(q, -PI_C2f*0.5f, s); } else if (fabsfk(d) < TRIGRANGEMAXf) { q = (int)rintfk(d * ((float)(2 * M_1_PI))); s = mlaf(q, -PI_Af*0.5f, s); s = mlaf(q, -PI_Bf*0.5f, s); s = mlaf(q, -PI_Cf*0.5f, s); s = mlaf(q, -PI_Df*0.5f, s); } else { dfi_t dfi = rempif(d); q = dfi.i; s = dfi.df.x + dfi.df.y; if (xisinff(d) || xisnanf(d)) s = SLEEF_NANf; } t = s; s = s * s; u = -0.000195169282960705459117889f; u = mlaf(u, s, 0.00833215750753879547119141f); u = mlaf(u, s, -0.166666537523269653320312f); u = u * s * t; r.x = t + u; if (xisnegzerof(d)) r.x = -0.0f; u = -2.71811842367242206819355e-07f; u = mlaf(u, s, 2.47990446951007470488548e-05f); u = mlaf(u, s, -0.00138888787478208541870117f); u = mlaf(u, s, 0.0416666641831398010253906f); u = mlaf(u, s, -0.5f); r.y = u * s + 1; if ((q & 1) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 2) != 0) { r.x = -r.x; } if (((q+1) & 2) != 0) { r.y = -r.y; } return r; } EXPORT CONST Sleef_float2 xsincosf_u1(float d) { int q; float u; Sleef_float2 r, s, t, x; if (fabsfk(d) < TRIGRANGEMAX2f) { q = (int)rintfk(d * (float)(2 * M_1_PI)); u = mlaf(q, -PI_A2f*0.5f, d); s = dfadd2_f2_f_f(u, q * (-PI_B2f*0.5f)); s = dfadd_f2_f2_f(s, q * (-PI_C2f*0.5f)); } else { dfi_t dfi = rempif(d); q = dfi.i; s = dfi.df; if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; } t = s; s.x = dfsqu_f_f2(s); u = -0.000195169282960705459117889f; u = mlaf(u, s.x, 0.00833215750753879547119141f); u = mlaf(u, s.x, -0.166666537523269653320312f); u *= s.x * t.x; x = dfadd_f2_f2_f(t, u); r.x = x.x + x.y; if (xisnegzerof(d)) r.x = -0.0f; u = -2.71811842367242206819355e-07f; u = mlaf(u, s.x, 2.47990446951007470488548e-05f); u = mlaf(u, s.x, -0.00138888787478208541870117f); u = mlaf(u, s.x, 0.0416666641831398010253906f); u = mlaf(u, s.x, -0.5f); x = dfadd_f2_f_f2(1, dfmul_f2_f_f(s.x, u)); r.y = x.x + x.y; if ((q & 1) != 0) { u = r.y; r.y = r.x; r.x = u; } if ((q & 2) != 0) { r.x = -r.x; } if (((q+1) & 2) != 0) { r.y = -r.y; } return r; } EXPORT CONST Sleef_float2 xsincospif_u05(float d) { float u, s, t; Sleef_float2 r, x, s2; u = d * 4; int q = ceilfk(u) & ~(int)1; s = u - (float)q; t = s; s = s * s; s2 = dfmul_f2_f_f(t, t); // u = +0.3093842054e-6; u = mlaf(u, s, -0.3657307388e-4); u = mlaf(u, s, +0.2490393585e-2); x = dfadd2_f2_f_f2(u * s, df(-0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), df(0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_f2_f2_f(x, t); r.x = x.x + x.y; if (xisnegzerof(d)) r.x = -0.0f; u = -0.2430611801e-7; u = mlaf(u, s, +0.3590577080e-5); u = mlaf(u, s, -0.3259917721e-3); x = dfadd2_f2_f_f2(u * s, df(0.015854343771934509277, 4.4940051354032242811e-10)); x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), df(-0.30842512845993041992, -9.0728339030733922277e-09)); x = dfadd2_f2_f2_f(dfmul_f2_f2_f2(x, s2), 1); r.y = x.x + x.y; if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (fabsfk(d) > 1e+7f) { r.x = 0; r.y = 1; } if (xisinff(d)) { r.x = r.y = SLEEF_NANf; } return r; } EXPORT CONST Sleef_float2 xsincospif_u35(float d) { float u, s, t; Sleef_float2 r; u = d * 4; int q = ceilfk(u) & ~(int)1; s = u - (float)q; t = s; s = s * s; // u = -0.3600925265e-4; u = mlaf(u, s, +0.2490088111e-2); u = mlaf(u, s, -0.8074551076e-1); u = mlaf(u, s, +0.7853981853e+0); r.x = u * t; u = +0.3539815225e-5; u = mlaf(u, s, -0.3259574005e-3); u = mlaf(u, s, +0.1585431583e-1); u = mlaf(u, s, -0.3084251285e+0); u = mlaf(u, s, 1); r.y = u; if ((q & 2) != 0) { s = r.y; r.y = r.x; r.x = s; } if ((q & 4) != 0) { r.x = -r.x; } if (((q+2) & 4) != 0) { r.y = -r.y; } if (fabsfk(d) > 1e+7f) { r.x = 0; r.y = 1; } if (xisinff(d)) { r.x = r.y = SLEEF_NANf; } return r; } EXPORT CONST float xtanf(float d) { int q; float u, s, x; x = d; if (fabsfk(d) < TRIGRANGEMAX2f*0.5f) { q = (int)rintfk(d * (float)(2 * M_1_PI)); x = mlaf(q, -PI_A2f*0.5f, x); x = mlaf(q, -PI_B2f*0.5f, x); x = mlaf(q, -PI_C2f*0.5f, x); } else if (fabsfk(d) < TRIGRANGEMAXf) { q = (int)rintfk(d * (float)(2 * M_1_PI)); x = mlaf(q, -PI_Af*0.5f, x); x = mlaf(q, -PI_Bf*0.5f, x); x = mlaf(q, -PI_Cf*0.5f, x); x = mlaf(q, -PI_Df*0.5f, x); } else { dfi_t dfi = rempif(d); q = dfi.i; x = dfi.df.x + dfi.df.y; if (xisinff(d) || xisnanf(d)) x = SLEEF_NANf; } s = x * x; if ((q & 1) != 0) x = -x; u = 0.00927245803177356719970703f; u = mlaf(u, s, 0.00331984995864331722259521f); u = mlaf(u, s, 0.0242998078465461730957031f); u = mlaf(u, s, 0.0534495301544666290283203f); u = mlaf(u, s, 0.133383005857467651367188f); u = mlaf(u, s, 0.333331853151321411132812f); u = mlaf(s, u * x, x); if ((q & 1) != 0) u = 1.0f / u; return u; } EXPORT CONST float xtanf_u1(float d) { int q; float u; Sleef_float2 s, t, x; if (fabsfk(d) < TRIGRANGEMAX2f) { q = (int)rintfk(d * (float)(2 * M_1_PI)); u = mlaf(q, -PI_A2f*0.5f, d); s = dfadd2_f2_f_f(u, q * (-PI_B2f*0.5f)); s = dfadd_f2_f2_f(s, q * (-PI_C2f*0.5f)); } else { dfi_t dfi = rempif(d); q = dfi.i; s = dfi.df; if (xisinff(d) || xisnanf(d)) s.x = SLEEF_NANf; } if ((q & 1) != 0) s = dfneg_f2_f2(s); t = s; s = dfsqu_f2_f2(s); s = dfnormalize_f2_f2(s); u = 0.00446636462584137916564941f; u = mlaf(u, s.x, -8.3920182078145444393158e-05f); u = mlaf(u, s.x, 0.0109639242291450500488281f); u = mlaf(u, s.x, 0.0212360303848981857299805f); u = mlaf(u, s.x, 0.0540687143802642822265625f); x = dfadd_f2_f_f(0.133325666189193725585938f, u * s.x); x = dfadd_f2_f_f2(1, dfmul_f2_f2_f2(dfadd_f2_f_f2(0.33333361148834228515625f, dfmul_f2_f2_f2(s, x)), s)); x = dfmul_f2_f2_f2(t, x); if ((q & 1) != 0) x = dfrec_f2_f2(x); u = x.x + x.y; if (xisnegzerof(d)) u = -0.0f; return u; } EXPORT CONST float xatanf(float s) { float t, u; int q = 0; if (signf(s) == -1) { s = -s; q = 2; } if (s > 1) { s = 1.0f / s; q |= 1; } t = s * s; u = 0.00282363896258175373077393f; u = mlaf(u, t, -0.0159569028764963150024414f); u = mlaf(u, t, 0.0425049886107444763183594f); u = mlaf(u, t, -0.0748900920152664184570312f); u = mlaf(u, t, 0.106347933411598205566406f); u = mlaf(u, t, -0.142027363181114196777344f); u = mlaf(u, t, 0.199926957488059997558594f); u = mlaf(u, t, -0.333331018686294555664062f); t = s + s * (t * u); if ((q & 1) != 0) t = 1.570796326794896557998982f - t; if ((q & 2) != 0) t = -t; return t; } static INLINE CONST float atan2kf(float y, float x) { float s, t, u; int q = 0; if (x < 0) { x = -x; q = -2; } if (y > x) { t = x; x = y; y = -t; q += 1; } s = y / x; t = s * s; u = 0.00282363896258175373077393f; u = mlaf(u, t, -0.0159569028764963150024414f); u = mlaf(u, t, 0.0425049886107444763183594f); u = mlaf(u, t, -0.0748900920152664184570312f); u = mlaf(u, t, 0.106347933411598205566406f); u = mlaf(u, t, -0.142027363181114196777344f); u = mlaf(u, t, 0.199926957488059997558594f); u = mlaf(u, t, -0.333331018686294555664062f); t = u * t * s + s; t = q * (float)(M_PI/2) + t; return t; } EXPORT CONST float xatan2f(float y, float x) { float r = atan2kf(fabsfk(y), x); r = mulsignf(r, x); if (xisinff(x) || x == 0) r = M_PIf/2 - (xisinff(x) ? (signf(x) * (float)(M_PI /2)) : 0); if (xisinff(y) ) r = M_PIf/2 - (xisinff(x) ? (signf(x) * (float)(M_PI*1/4)) : 0); if ( y == 0) r = (signf(x) == -1 ? M_PIf : 0); return xisnanf(x) || xisnanf(y) ? SLEEF_NANf : mulsignf(r, y); } EXPORT CONST float xasinf(float d) { int o = fabsfk(d) < 0.5f; float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), x = o ? fabsfk(d) : SQRTF(x2), u; u = +0.4197454825e-1; u = mlaf(u, x2, +0.2424046025e-1); u = mlaf(u, x2, +0.4547423869e-1); u = mlaf(u, x2, +0.7495029271e-1); u = mlaf(u, x2, +0.1666677296e+0); u = mlaf(u, x * x2, x); float r = o ? u : (M_PIf/2 - 2*u); r = mulsignf(r, d); return r; } EXPORT CONST float xacosf(float d) { int o = fabsfk(d) < 0.5f; float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), u; float x = o ? fabsfk(d) : SQRTF(x2); x = fabsfk(d) == 1.0 ? 0 : x; u = +0.4197454825e-1; u = mlaf(u, x2, +0.2424046025e-1); u = mlaf(u, x2, +0.4547423869e-1); u = mlaf(u, x2, +0.7495029271e-1); u = mlaf(u, x2, +0.1666677296e+0); u *= x * x2; float y = 3.1415926535897932f/2 - (mulsignf(x, d) + mulsignf(u, d)); x += u; float r = o ? y : (x*2); if (!o && d < 0) r = dfadd_f2_f2_f(df(3.1415927410125732422f,-8.7422776573475857731e-08f), -r).x; return r; } static Sleef_float2 atan2kf_u1(Sleef_float2 y, Sleef_float2 x) { float u; Sleef_float2 s, t; int q = 0; if (x.x < 0) { x.x = -x.x; x.y = -x.y; q = -2; } if (y.x > x.x) { t = x; x = y; y.x = -t.x; y.y = -t.y; q += 1; } s = dfdiv_f2_f2_f2(y, x); t = dfsqu_f2_f2(s); t = dfnormalize_f2_f2(t); u = -0.00176397908944636583328247f; u = mlaf(u, t.x, 0.0107900900766253471374512f); u = mlaf(u, t.x, -0.0309564601629972457885742f); u = mlaf(u, t.x, 0.0577365085482597351074219f); u = mlaf(u, t.x, -0.0838950723409652709960938f); u = mlaf(u, t.x, 0.109463557600975036621094f); u = mlaf(u, t.x, -0.142626821994781494140625f); u = mlaf(u, t.x, 0.199983194470405578613281f); t = dfmul_f2_f2_f2(t, dfadd_f2_f_f(-0.333332866430282592773438f, u * t.x)); t = dfmul_f2_f2_f2(s, dfadd_f2_f_f2(1, t)); t = dfadd2_f2_f2_f2(dfmul_f2_f2_f(df(1.5707963705062866211f, -4.3711388286737928865e-08f), q), t); return t; } EXPORT CONST float xatan2f_u1(float y, float x) { if (fabsfk(x) < 2.9387372783541830947e-39f) { y *= (1ULL << 24); x *= (1ULL << 24); } // nexttowardf((1.0 / FLT_MAX), 1) Sleef_float2 d = atan2kf_u1(df(fabsfk(y), 0), df(x, 0)); float r = d.x + d.y; r = mulsignf(r, x); if (xisinff(x) || x == 0) r = (float)M_PI/2 - (xisinff(x) ? (signf(x) * (float)(M_PI /2)) : 0.0f); if (xisinff(y) ) r = (float)M_PI/2 - (xisinff(x) ? (signf(x) * (float)(M_PI*1/4)) : 0.0f); if ( y == 0) r = (signf(x) == -1 ? (float)M_PI : 0.0f); return xisnanf(x) || xisnanf(y) ? SLEEF_NANf : mulsignf(r, y); } EXPORT CONST float xasinf_u1(float d) { int o = fabsfk(d) < 0.5f; float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), u; Sleef_float2 x = o ? df(fabsfk(d), 0) : dfsqrt_f2_f(x2); x = fabsfk(d) == 1.0f ? df(0, 0) : x; u = +0.4197454825e-1; u = mlaf(u, x2, +0.2424046025e-1); u = mlaf(u, x2, +0.4547423869e-1); u = mlaf(u, x2, +0.7495029271e-1); u = mlaf(u, x2, +0.1666677296e+0); u *= x2 * x.x; Sleef_float2 y = dfadd_f2_f2_f(dfsub_f2_f2_f2(df(3.1415927410125732422f/4,-8.7422776573475857731e-08f/4), x), -u); float r = o ? (u + x.x) : ((y.x + y.y)*2); r = mulsignf(r, d); return r; } EXPORT CONST float xacosf_u1(float d) { int o = fabsfk(d) < 0.5f; float x2 = o ? (d*d) : ((1-fabsfk(d))*0.5f), u; Sleef_float2 x = o ? df(fabsfk(d), 0) : dfsqrt_f2_f(x2); x = fabsfk(d) == 1.0 ? df(0, 0) : x; u = +0.4197454825e-1; u = mlaf(u, x2, +0.2424046025e-1); u = mlaf(u, x2, +0.4547423869e-1); u = mlaf(u, x2, +0.7495029271e-1); u = mlaf(u, x2, +0.1666677296e+0); u = u * x.x * x2; Sleef_float2 y = dfsub_f2_f2_f2(df(3.1415927410125732422f/2,-8.7422776573475857731e-08f/2), dfadd_f2_f_f(mulsignf(x.x, d), mulsignf(u, d))); x = dfadd_f2_f2_f(x, u); y = o ? y : dfscale_f2_f2_f(x, 2); if (!o && d < 0) y = dfsub_f2_f2_f2(df(3.1415927410125732422f,-8.7422776573475857731e-08f), y); return y.x + y.y; } EXPORT CONST float xatanf_u1(float d) { Sleef_float2 d2 = atan2kf_u1(df(fabsfk(d), 0.0f), df(1.0f, 0.0f)); float r = d2.x + d2.y; if (xisinff(d)) r = 1.570796326794896557998982f; return mulsignf(r, d); } EXPORT CONST float xlogf(float d) { float x, x2, t, m; int e; int o = d < FLT_MIN; if (o) d *= (float)(1LL << 32) * (float)(1LL << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = (m-1.0f) / (m+1.0f); x2 = x * x; t = 0.2392828464508056640625f; t = mlaf(t, x2, 0.28518211841583251953125f); t = mlaf(t, x2, 0.400005877017974853515625f); t = mlaf(t, x2, 0.666666686534881591796875f); t = mlaf(t, x2, 2.0f); x = x * t + 0.693147180559945286226764f * e; if (xisinff(d)) x = SLEEF_INFINITYf; if (d < 0 || xisnanf(d)) x = SLEEF_NANf; if (d == 0) x = -SLEEF_INFINITYf; return x; } EXPORT CONST float xexpf(float d) { int q = (int)rintfk(d * R_LN2f); float s, u; s = mlaf(q, -L2Uf, d); s = mlaf(q, -L2Lf, s); u = 0.000198527617612853646278381; u = mlaf(u, s, 0.00139304355252534151077271); u = mlaf(u, s, 0.00833336077630519866943359); u = mlaf(u, s, 0.0416664853692054748535156); u = mlaf(u, s, 0.166666671633720397949219); u = mlaf(u, s, 0.5); u = s * s * u + s + 1.0f; u = ldexp2kf(u, q); if (d < -104) u = 0; if (d > 104) u = SLEEF_INFINITYf; return u; } static INLINE CONST float expkf(Sleef_float2 d) { int q = (int)rintfk((d.x + d.y) * R_LN2f); Sleef_float2 s, t; float u; s = dfadd2_f2_f2_f(d, q * -L2Uf); s = dfadd2_f2_f2_f(s, q * -L2Lf); s = dfnormalize_f2_f2(s); u = 0.00136324646882712841033936f; u = mlaf(u, s.x, 0.00836596917361021041870117f); u = mlaf(u, s.x, 0.0416710823774337768554688f); u = mlaf(u, s.x, 0.166665524244308471679688f); u = mlaf(u, s.x, 0.499999850988388061523438f); t = dfadd_f2_f2_f2(s, dfmul_f2_f2_f(dfsqu_f2_f2(s), u)); t = dfadd_f2_f_f2(1, t); u = ldexpkf(t.x + t.y, q); if (d.x < -104) u = 0; return u; } static INLINE CONST float expm1kf(float d) { int q = (int)rintfk(d * R_LN2f); float s, u; s = mlaf(q, -L2Uf, d); s = mlaf(q, -L2Lf, s); u = 0.000198527617612853646278381; u = mlaf(u, s, 0.00139304355252534151077271); u = mlaf(u, s, 0.00833336077630519866943359); u = mlaf(u, s, 0.0416664853692054748535156); u = mlaf(u, s, 0.166666671633720397949219); u = mlaf(u, s, 0.5); u = s * s * u + s; if (q != 0) u = ldexp2kf(u + 1, q) - 1; return u; } static INLINE CONST Sleef_float2 logkf(float d) { Sleef_float2 x, x2, s; float m, t; int e; int o = d < FLT_MIN; if (o) d *= (float)(1LL << 32) * (float)(1LL << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); x2 = dfsqu_f2_f2(x); t = 0.240320354700088500976562; t = mlaf(t, x2.x, 0.285112679004669189453125); t = mlaf(t, x2.x, 0.400007992982864379882812); Sleef_float2 c = df(0.66666662693023681640625f, 3.69183861259614332084311e-09f); s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e); s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); s = dfadd_f2_f2_f2(s, dfmul_f2_f2_f2(dfmul_f2_f2_f2(x2, x), dfadd2_f2_f2_f2(dfmul_f2_f2_f(x2, t), c))); return s; } EXPORT CONST float xlogf_u1(float d) { Sleef_float2 x, s; float m, t, x2; int e; int o = d < FLT_MIN; if (o) d *= (float)(1LL << 32) * (float)(1LL << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); x2 = x.x * x.x; t = +0.3027294874e+0f; t = mlaf(t, x2, +0.3996108174e+0f); t = mlaf(t, x2, +0.6666694880e+0f); s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), (float)e); s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); s = dfadd_f2_f2_f(s, x2 * x.x * t); float r = s.x + s.y; if (xisinff(d)) r = SLEEF_INFINITYf; if (d < 0 || xisnanf(d)) r = SLEEF_NANf; if (d == 0) r = -SLEEF_INFINITYf; return r; } static INLINE CONST Sleef_float2 expk2f(Sleef_float2 d) { int q = (int)rintfk((d.x + d.y) * R_LN2f); Sleef_float2 s, t; float u; s = dfadd2_f2_f2_f(d, q * -L2Uf); s = dfadd2_f2_f2_f(s, q * -L2Lf); u = +0.1980960224e-3f; u = mlaf(u, s.x, +0.1394256484e-2f); u = mlaf(u, s.x, +0.8333456703e-2f); u = mlaf(u, s.x, +0.4166637361e-1f); t = dfadd2_f2_f2_f(dfmul_f2_f2_f(s, u), +0.166666659414234244790680580464e+0f); t = dfadd2_f2_f2_f(dfmul_f2_f2_f2(s, t), 0.5); t = dfadd2_f2_f2_f2(s, dfmul_f2_f2_f2(dfsqu_f2_f2(s), t)); t = dfadd2_f2_f_f2(1, t); t.x = ldexp2kf(t.x, q); t.y = ldexp2kf(t.y, q); return d.x < -104 ? df(0, 0) : t; } EXPORT CONST float xpowf(float x, float y) { int yisint = (y == (int)y) || (fabsfk(y) >= (float)(1LL << 24)); int yisodd = (1 & (int)y) != 0 && yisint && fabsfk(y) < (float)(1LL << 24); float result = expkf(dfmul_f2_f2_f(logkf(fabsfk(x)), y)); result = xisnanf(result) ? SLEEF_INFINITYf : result; result *= (x >= 0 ? 1 : (!yisint ? SLEEF_NANf : (yisodd ? -1 : 1))); float efx = mulsignf(fabsfk(x) - 1, y); if (xisinff(y)) result = efx < 0 ? 0.0f : (efx == 0 ? 1.0f : SLEEF_INFINITYf); if (xisinff(x) || x == 0) result = (yisodd ? signf(x) : 1) * ((x == 0 ? -y : y) < 0 ? 0 : SLEEF_INFINITYf); if (xisnanf(x) || xisnanf(y)) result = SLEEF_NANf; if (y == 0 || x == 1) result = 1; return result; } EXPORT CONST float xsinhf(float x) { float y = fabsfk(x); Sleef_float2 d = expk2f(df(y, 0)); d = dfsub_f2_f2_f2(d, dfrec_f2_f2(d)); y = (d.x + d.y) * 0.5f; y = fabsfk(x) > 89 ? SLEEF_INFINITYf : y; y = xisnanf(y) ? SLEEF_INFINITYf : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xcoshf(float x) { float y = fabsfk(x); Sleef_float2 d = expk2f(df(y, 0)); d = dfadd_f2_f2_f2(d, dfrec_f2_f2(d)); y = (d.x + d.y) * 0.5f; y = fabsfk(x) > 89 ? SLEEF_INFINITYf : y; y = xisnanf(y) ? SLEEF_INFINITYf : y; y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xtanhf(float x) { float y = fabsfk(x); Sleef_float2 d = expk2f(df(y, 0)); Sleef_float2 e = dfrec_f2_f2(d); d = dfdiv_f2_f2_f2(dfsub_f2_f2_f2(d, e), dfadd_f2_f2_f2(d, e)); y = d.x + d.y; y = fabsfk(x) > 18.714973875f ? 1.0f : y; y = xisnanf(y) ? 1.0f : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xsinhf_u35(float x) { float e = expm1kf(fabsfk(x)); float y = (e + 2) / (e + 1) * (0.5f * e); y = fabsfk(x) > 88 ? SLEEF_INFINITYf : y; y = xisnanf(y) ? SLEEF_INFINITYf : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xcoshf_u35(float x) { float e = xexpf(fabsfk(x)); float y = 0.5f * e + 0.5f / e; y = fabsfk(x) > 88 ? SLEEF_INFINITYf : y; y = xisnanf(y) ? SLEEF_INFINITYf : y; y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xtanhf_u35(float x) { float y = fabsfk(x); float d = expm1kf(2*y); y = d / (d + 2); y = fabsfk(x) > 18.714973875f ? 1.0f : y; y = xisnanf(y) ? 1.0f : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; return y; } static INLINE CONST Sleef_float2 logk2f(Sleef_float2 d) { Sleef_float2 x, x2, m, s; float t; int e; e = ilogbkf(d.x * (1.0f/0.75f)); m = dfscale_f2_f2_f(d, pow2if(-e)); x = dfdiv_f2_f2_f2(dfadd2_f2_f2_f(m, -1), dfadd2_f2_f2_f(m, 1)); x2 = dfsqu_f2_f2(x); t = 0.2392828464508056640625f; t = mlaf(t, x2.x, 0.28518211841583251953125f); t = mlaf(t, x2.x, 0.400005877017974853515625f); t = mlaf(t, x2.x, 0.666666686534881591796875f); s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), e); s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); s = dfadd_f2_f2_f2(s, dfmul_f2_f2_f(dfmul_f2_f2_f2(x2, x), t)); return s; } EXPORT CONST float xasinhf(float x) { float y = fabsfk(x); Sleef_float2 d; d = y > 1 ? dfrec_f2_f(x) : df(y, 0); d = dfsqrt_f2_f2(dfadd2_f2_f2_f(dfsqu_f2_f2(d), 1)); d = y > 1 ? dfmul_f2_f2_f(d, y) : d; d = logk2f(dfnormalize_f2_f2(dfadd_f2_f2_f(d, x))); y = d.x + d.y; y = (fabsfk(x) > SQRT_FLT_MAX || xisnanf(y)) ? mulsignf(SLEEF_INFINITYf, x) : y; y = xisnanf(x) ? SLEEF_NANf : y; y = xisnegzerof(x) ? -0.0f : y; return y; } EXPORT CONST float xacoshf(float x) { Sleef_float2 d = logk2f(dfadd2_f2_f2_f(dfmul_f2_f2_f2(dfsqrt_f2_f2(dfadd2_f2_f_f(x, 1)), dfsqrt_f2_f2(dfadd2_f2_f_f(x, -1))), x)); float y = d.x + d.y; y = (x > SQRT_FLT_MAX || xisnanf(y)) ? SLEEF_INFINITYf : y; y = x == 1.0f ? 0.0f : y; y = x < 1.0f ? SLEEF_NANf : y; y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xatanhf(float x) { float y = fabsfk(x); Sleef_float2 d = logk2f(dfdiv_f2_f2_f2(dfadd2_f2_f_f(1, y), dfadd2_f2_f_f(1, -y))); y = y > 1.0f ? SLEEF_NANf : (y == 1.0f ? SLEEF_INFINITYf : (d.x + d.y) * 0.5f); y = xisinff(x) || xisnanf(y) ? SLEEF_NANf : y; y = mulsignf(y, x); y = xisnanf(x) ? SLEEF_NANf : y; return y; } EXPORT CONST float xexp2f(float d) { int q = (int)rintfk(d); float s, u; s = d - q; u = +0.1535920892e-3; u = mlaf(u, s, +0.1339262701e-2); u = mlaf(u, s, +0.9618384764e-2); u = mlaf(u, s, +0.5550347269e-1); u = mlaf(u, s, +0.2402264476e+0); u = mlaf(u, s, +0.6931471825e+0); u = dfnormalize_f2_f2(dfadd_f2_f_f2(1, dfmul_f2_f_f(u, s))).x; u = ldexp2kf(u, q); if (d >= 128) u = SLEEF_INFINITYf; if (d < -150) u = 0; return u; } EXPORT CONST float xexp10f(float d) { int q = (int)rintfk(d * (float)LOG10_2); float s, u; s = mlaf(q, -L10Uf, d); s = mlaf(q, -L10Lf, s); u = +0.2064004987e+0; u = mlaf(u, s, +0.5417877436e+0); u = mlaf(u, s, +0.1171286821e+1); u = mlaf(u, s, +0.2034656048e+1); u = mlaf(u, s, +0.2650948763e+1); u = mlaf(u, s, +0.2302585125e+1); u = dfnormalize_f2_f2(dfadd_f2_f_f2(1, dfmul_f2_f_f(u, s))).x; u = ldexp2kf(u, q); if (d > 38.5318394191036238941387f) u = SLEEF_INFINITYf; // log10(FLT_MAX) if (d < -50) u = 0; return u; } EXPORT CONST float xexpm1f(float a) { Sleef_float2 d = dfadd2_f2_f2_f(expk2f(df(a, 0)), -1.0f); float x = d.x + d.y; if (a > 88.72283172607421875f) x = SLEEF_INFINITYf; if (a < -16.635532333438687426013570f) x = -1; if (xisnegzerof(a)) x = -0.0f; return x; } EXPORT CONST float xlog10f(float d) { Sleef_float2 x, s; float m, t, x2; int e; int o = d < FLT_MIN; if (o) d *= (float)(1LL << 32) * (float)(1LL << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); x2 = x.x * x.x; t = +0.1314289868e+0; t = mlaf(t, x2, +0.1735493541e+0); t = mlaf(t, x2, +0.2895309627e+0); s = dfmul_f2_f2_f(df(0.30103001, -1.432098889e-08), (float)e); s = dfadd_f2_f2_f2(s, dfmul_f2_f2_f2(x, df(0.868588984, -2.170757285e-08))); s = dfadd_f2_f2_f(s, x2 * x.x * t); float r = s.x + s.y; if (xisinff(d)) r = SLEEF_INFINITYf; if (d < 0 || xisnanf(d)) r = SLEEF_NANf; if (d == 0) r = -SLEEF_INFINITYf; return r; } EXPORT CONST float xlog2f(float d) { Sleef_float2 x, s; float m, t, x2; int e; int o = d < FLT_MIN; if (o) d *= (float)(1LL << 32) * (float)(1LL << 32); e = ilogb2kf(d * (1.0f/0.75f)); m = ldexp3kf(d, -e); if (o) e -= 64; x = dfdiv_f2_f2_f2(dfadd2_f2_f_f(-1, m), dfadd2_f2_f_f(1, m)); x2 = x.x * x.x; t = +0.4374550283e+0f; t = mlaf(t, x2, +0.5764790177e+0f); t = mlaf(t, x2, +0.9618012905120f); s = dfadd2_f2_f_f2(e, dfmul_f2_f2_f2(x, df(2.8853900432586669922, 3.2734474483568488616e-08))); s = dfadd2_f2_f2_f(s, x2 * x.x * t); float r = s.x + s.y; if (xisinff(d)) r = SLEEF_INFINITYf; if (d < 0 || xisnanf(d)) r = SLEEF_NANf; if (d == 0) r = -SLEEF_INFINITYf; return r; } EXPORT CONST float xlog1pf(float d) { Sleef_float2 x, s; float m, t, x2; int e; float dp1 = d + 1; int o = dp1 < FLT_MIN; if (o) dp1 *= (float)(1LL << 32) * (float)(1LL << 32); e = ilogb2kf(dp1 * (1.0f/0.75f)); t = ldexp3kf(1, -e); m = mlaf(d, t, t-1); if (o) e -= 64; x = dfdiv_f2_f2_f2(df(m, 0), dfadd_f2_f_f(2, m)); x2 = x.x * x.x; t = +0.3027294874e+0f; t = mlaf(t, x2, +0.3996108174e+0f); t = mlaf(t, x2, +0.6666694880e+0f); s = dfmul_f2_f2_f(df(0.69314718246459960938f, -1.904654323148236017e-09f), (float)e); s = dfadd_f2_f2_f2(s, dfscale_f2_f2_f(x, 2)); s = dfadd_f2_f2_f(s, x2 * x.x * t); float r = s.x + s.y; if (d > 1e+38) r = SLEEF_INFINITYf; if (d < -1) r = SLEEF_NANf; if (d == -1) r = -SLEEF_INFINITYf; if (xisnegzerof(d)) r = -0.0f; return r; } EXPORT CONST float xcbrtf(float d) { float x, y, q = 1.0f; int e, r; e = ilogbkf(fabsfk(d))+1; d = ldexp2kf(d, -e); r = (e + 6144) % 3; q = (r == 1) ? 1.2599210498948731647672106f : q; q = (r == 2) ? 1.5874010519681994747517056f : q; q = ldexp2kf(q, (e + 6144) / 3 - 2048); q = mulsignf(q, d); d = fabsfk(d); x = -0.601564466953277587890625f; x = mlaf(x, d, 2.8208892345428466796875f); x = mlaf(x, d, -5.532182216644287109375f); x = mlaf(x, d, 5.898262500762939453125f); x = mlaf(x, d, -3.8095417022705078125f); x = mlaf(x, d, 2.2241256237030029296875f); y = d * x * x; y = (y - (2.0f / 3.0f) * y * (y * x - 1.0f)) * q; return y; } EXPORT CONST float xcbrtf_u1(float d) { float x, y, z; Sleef_float2 q2 = df(1, 0), u, v; int e, r; e = ilogbkf(fabsfk(d))+1; d = ldexp2kf(d, -e); r = (e + 6144) % 3; q2 = (r == 1) ? df(1.2599210739135742188, -2.4018701694217270415e-08) : q2; q2 = (r == 2) ? df(1.5874010324478149414, 1.9520385308169352356e-08) : q2; q2.x = mulsignf(q2.x, d); q2.y = mulsignf(q2.y, d); d = fabsfk(d); x = -0.601564466953277587890625f; x = mlaf(x, d, 2.8208892345428466796875f); x = mlaf(x, d, -5.532182216644287109375f); x = mlaf(x, d, 5.898262500762939453125f); x = mlaf(x, d, -3.8095417022705078125f); x = mlaf(x, d, 2.2241256237030029296875f); y = x * x; y = y * y; x -= (d * y - x) * (1.0 / 3.0f); z = x; u = dfmul_f2_f_f(x, x); u = dfmul_f2_f2_f2(u, u); u = dfmul_f2_f2_f(u, d); u = dfadd2_f2_f2_f(u, -x); y = u.x + u.y; y = -2.0 / 3.0 * y * z; v = dfadd2_f2_f2_f(dfmul_f2_f_f(z, z), y); v = dfmul_f2_f2_f(v, d); v = dfmul_f2_f2_f2(v, q2); z = ldexp2kf(v.x + v.y, (e + 6144) / 3 - 2048); if (xisinff(d)) { z = mulsignf(SLEEF_INFINITYf, q2.x); } if (d == 0) { z = mulsignf(0, q2.x); } return z; } // EXPORT CONST float xfabsf(float x) { return fabsfk(x); } EXPORT CONST float xcopysignf(float x, float y) { return copysignfk(x, y); } EXPORT CONST float xfmaxf(float x, float y) { return y != y ? x : (x > y ? x : y); } EXPORT CONST float xfminf(float x, float y) { return y != y ? x : (x < y ? x : y); } EXPORT CONST float xfdimf(float x, float y) { float ret = x - y; if (ret < 0 || x == y) ret = 0; return ret; } EXPORT CONST float xtruncf(float x) { float fr = x - (int32_t)x; return (xisinff(x) || fabsfk(x) >= (float)(1LL << 23)) ? x : copysignfk(x - fr, x); } EXPORT CONST float xfloorf(float x) { float fr = x - (int32_t)x; fr = fr < 0 ? fr+1.0f : fr; return (xisinff(x) || fabsfk(x) >= (float)(1LL << 23)) ? x : copysignfk(x - fr, x); } EXPORT CONST float xceilf(float x) { float fr = x - (int32_t)x; fr = fr <= 0 ? fr : fr-1.0f; return (xisinff(x) || fabsfk(x) >= (float)(1LL << 23)) ? x : copysignfk(x - fr, x); } EXPORT CONST float xroundf(float d) { float x = d + 0.5f; float fr = x - (int32_t)x; if (fr == 0 && x <= 0) x--; fr = fr < 0 ? fr+1.0f : fr; x = d == 0.4999999701976776123f ? 0 : x; // nextafterf(0.5, 0) return (xisinff(d) || fabsfk(d) >= (float)(1LL << 23)) ? d : copysignfk(x - fr, d); } EXPORT CONST float xrintf(float d) { float x = d + 0.5f; int32_t isodd = (1 & (int32_t)x) != 0; float fr = x - (int32_t)x; fr = (fr < 0 || (fr == 0 && isodd)) ? fr+1.0f : fr; x = d == 0.50000005960464477539f ? 0 : x; // nextafterf(0.5, 1) return (xisinff(d) || fabsfk(d) >= (float)(1LL << 23)) ? d : copysignfk(x - fr, d); } EXPORT CONST Sleef_float2 xmodff(float x) { float fr = x - (int32_t)x; fr = fabsfk(x) > (float)(1LL << 23) ? 0 : fr; Sleef_float2 ret = { copysignfk(fr, x), copysignfk(x - fr, x) }; return ret; } EXPORT CONST float xldexpf(float x, int exp) { if (exp > 300) exp = 300; if (exp < -300) exp = -300; int e0 = exp >> 2; if (exp < 0) e0++; if (-50 < exp && exp < 50) e0 = 0; int e1 = exp - (e0 << 2); float p = pow2if(e0); float ret = x * pow2if(e1) * p * p * p * p; return ret; } EXPORT CONST float xnextafterf(float x, float y) { union { float f; int32_t i; } cx; cx.f = x == 0 ? mulsignf(0, y) : x; int c = (cx.i < 0) == (y < x); if (c) cx.i = -(cx.i ^ (1 << 31)); if (x != y) cx.i--; if (c) cx.i = -(cx.i ^ (1 << 31)); if (cx.f == 0 && x != 0) cx.f = mulsignf(0, x); if (x == 0 && y == 0) cx.f = y; if (xisnanf(x) || xisnanf(y)) cx.f = SLEEF_NANf; return cx.f; } EXPORT CONST float xfrfrexpf(float x) { union { float f; int32_t u; } cx; if (fabsfk(x) < FLT_MIN) x *= (1 << 30); cx.f = x; cx.u &= ~0x7f800000U; cx.u |= 0x3f000000U; if (xisinff(x)) cx.f = mulsignf(SLEEF_INFINITYf, x); if (x == 0) cx.f = x; return cx.f; } EXPORT CONST int xexpfrexpf(float x) { union { float f; uint32_t u; } cx; int ret = 0; if (fabsfk(x) < FLT_MIN) { x *= (1 << 30); ret = -30; } cx.f = x; ret += (int32_t)(((cx.u >> 23) & 0xff)) - 0x7e; if (x == 0 || xisnanf(x) || xisinff(x)) ret = 0; return ret; } EXPORT CONST float xhypotf_u05(float x, float y) { x = fabsfk(x); y = fabsfk(y); float min = fminfk(x, y), n = min; float max = fmaxfk(x, y), d = max; if (max < FLT_MIN) { n *= 1ULL << 24; d *= 1ULL << 24; } Sleef_float2 t = dfdiv_f2_f2_f2(df(n, 0), df(d, 0)); t = dfmul_f2_f2_f(dfsqrt_f2_f2(dfadd2_f2_f2_f(dfsqu_f2_f2(t), 1)), max); float ret = t.x + t.y; if (xisnanf(ret)) ret = SLEEF_INFINITYf; if (min == 0) ret = max; if (xisnanf(x) || xisnanf(y)) ret = SLEEF_NANf; if (x == SLEEF_INFINITYf || y == SLEEF_INFINITYf) ret = SLEEF_INFINITYf; return ret; } EXPORT CONST float xhypotf_u35(float x, float y) { x = fabsfk(x); y = fabsfk(y); float min = fminfk(x, y); float max = fmaxfk(x, y); float t = min / max; float ret = max * SQRTF(1 + t*t); if (min == 0) ret = max; if (xisnanf(x) || xisnanf(y)) ret = SLEEF_NANf; if (x == SLEEF_INFINITYf || y == SLEEF_INFINITYf) ret = SLEEF_INFINITYf; return ret; } static INLINE CONST float toward0f(float d) { return d == 0 ? 0 : intBitsToFloat(floatToRawIntBits(d)-1); } static INLINE CONST float ptruncf(float x) { return fabsfk(x) >= (float)(1LL << 23) ? x : (x - (x - (int32_t)x)); } EXPORT CONST float xfmodf(float x, float y) { float nu = fabsfk(x), de = fabsfk(y), s = 1, q; if (de < FLT_MIN) { nu *= 1ULL << 25; de *= 1ULL << 25; s = 1.0f / (1ULL << 25); } Sleef_float2 r = df(nu, 0); float rde = toward0f(1.0f / de); for(int i=0;i<8;i++) { // ceil(log2(FLT_MAX) / 22)+1 q = (de+de > r.x && r.x >= de) ? 1.0f : (toward0f(r.x) * rde); r = dfnormalize_f2_f2(dfadd2_f2_f2_f2(r, dfmul_f2_f_f(ptruncf(q), -de))); if (r.x < de) break; } float ret = (r.x + r.y) * s; if (r.x + r.y == de) ret = 0; ret = mulsignf(ret, x); if (nu < de) ret = x; if (de == 0) ret = SLEEF_NANf; return ret; } EXPORT CONST float xsqrtf_u05(float d) { float q = 0.5f; d = d < 0 ? SLEEF_NANf : d; if (d < 5.2939559203393770e-23f) { d *= 1.8889465931478580e+22f; q = 7.2759576141834260e-12f * 0.5f; } if (d > 1.8446744073709552e+19f) { d *= 5.4210108624275220e-20f; q = 4294967296.0f * 0.5f; } // http://en.wikipedia.org/wiki/Fast_inverse_square_root float x = intBitsToFloat(0x5f375a86 - (floatToRawIntBits(d + 1e-45f) >> 1)); x = x * (1.5f - 0.5f * d * x * x); x = x * (1.5f - 0.5f * d * x * x); x = x * (1.5f - 0.5f * d * x * x) * d; Sleef_float2 d2 = dfmul_f2_f2_f2(dfadd2_f2_f_f2(d, dfmul_f2_f_f(x, x)), dfrec_f2_f(x)); float ret = (d2.x + d2.y) * q; ret = d == SLEEF_INFINITYf ? SLEEF_INFINITYf : ret; ret = d == 0 ? d : ret; return ret; } EXPORT CONST float xsqrtf_u35(float d) { float q = 1.0f; d = d < 0 ? SLEEF_NANf : d; if (d < 5.2939559203393770e-23f) { d *= 1.8889465931478580e+22f; q = 7.2759576141834260e-12f; } if (d > 1.8446744073709552e+19f) { d *= 5.4210108624275220e-20f; q = 4294967296.0f; } // http://en.wikipedia.org/wiki/Fast_inverse_square_root float x = intBitsToFloat(0x5f375a86 - (floatToRawIntBits(d + 1e-45) >> 1)); x = x * (1.5f - 0.5f * d * x * x); x = x * (1.5f - 0.5f * d * x * x); x = x * (1.5f - 0.5f * d * x * x); x = x * (1.5f - 0.5f * d * x * x); return d == SLEEF_INFINITYf ? SLEEF_INFINITYf : (x * d * q); } EXPORT CONST float xsqrtf(float d) { return SQRTF(d); } EXPORT CONST float xfmaf(float x, float y, float z) { float h2 = x * y + z, q = 1; if (fabsfk(h2) < 1e-38f) { const float c0 = 1 << 25, c1 = c0 * c0, c2 = c1 * c1; x *= c1; y *= c1; z *= c2; q = 1.0f / c2; } if (fabsfk(h2) > 1e+38f) { const float c0 = 1 << 25, c1 = c0 * c0, c2 = c1 * c1; x *= 1.0 / c1; y *= 1.0 / c1; z *= 1.0 / c2; q = c2; } Sleef_float2 d = dfmul_f2_f_f(x, y); d = dfadd2_f2_f2_f(d, z); float ret = (x == 0 || y == 0) ? z : (d.x + d.y); if (xisinff(z) && !xisinff(x) && !xisnanf(x) && !xisinff(y) && !xisnanf(y)) h2 = z; return (xisinff(h2) || xisnanf(h2)) ? h2 : ret*q; } // static INLINE CONST Sleef_float2 sinpifk(float d) { float u, s, t; Sleef_float2 x, s2; u = d * 4; int q = ceilfk(u) & ~1; int o = (q & 2) != 0; s = u - (float)q; t = s; s = s * s; s2 = dfmul_f2_f_f(t, t); // u = o ? -0.2430611801e-7f : +0.3093842054e-6f; u = mlaf(u, s, o ? +0.3590577080e-5f : -0.3657307388e-4f); u = mlaf(u, s, o ? -0.3259917721e-3f : +0.2490393585e-2f); x = dfadd2_f2_f_f2(u * s, o ? df(0.015854343771934509277, 4.4940051354032242811e-10) : df(-0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), o ? df(-0.30842512845993041992, -9.0728339030733922277e-09) : df(0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_f2_f2_f2(x, o ? s2 : df(t, 0)); x = o ? dfadd2_f2_f2_f(x, 1) : x; // if ((q & 4) != 0) { x.x = -x.x; x.y = -x.y; } return x; } EXPORT CONST float xsinpif_u05(float d) { Sleef_float2 x = sinpifk(d); float r = x.x + x.y; if (xisnegzerof(d)) r = -0.0; if (fabsfk(d) > TRIGRANGEMAX4f) r = 0; if (xisinff(d)) r = SLEEF_NANf; return r; } static INLINE CONST Sleef_float2 cospifk(float d) { float u, s, t; Sleef_float2 x, s2; u = d * 4; int q = ceilfk(u) & ~1; int o = (q & 2) == 0; s = u - (float)q; t = s; s = s * s; s2 = dfmul_f2_f_f(t, t); // u = o ? -0.2430611801e-7f : +0.3093842054e-6f; u = mlaf(u, s, o ? +0.3590577080e-5f : -0.3657307388e-4f); u = mlaf(u, s, o ? -0.3259917721e-3f : +0.2490393585e-2f); x = dfadd2_f2_f_f2(u * s, o ? df(0.015854343771934509277, 4.4940051354032242811e-10) : df(-0.080745510756969451904, -1.3373665339076936258e-09)); x = dfadd2_f2_f2_f2(dfmul_f2_f2_f2(s2, x), o ? df(-0.30842512845993041992, -9.0728339030733922277e-09) : df(0.78539818525314331055, -2.1857338617566484855e-08)); x = dfmul_f2_f2_f2(x, o ? s2 : df(t, 0)); x = o ? dfadd2_f2_f2_f(x, 1) : x; // if (((q+2) & 4) != 0) { x.x = -x.x; x.y = -x.y; } return x; } EXPORT CONST float xcospif_u05(float d) { Sleef_float2 x = cospifk(d); float r = x.x + x.y; if (fabsfk(d) > TRIGRANGEMAX4f) r = 1; if (xisinff(d)) r = SLEEF_NANf; return r; } typedef struct { Sleef_float2 a, b; } df2; static CONST df2 gammafk(float a) { Sleef_float2 clc = df(0, 0), clln = df(1, 0), clld = df(1, 0), v = df(1, 0), x, y, z; float t, u; int otiny = fabsfk(a) < 1e-30f, oref = a < 0.5f; x = otiny ? df(0, 0) : (oref ? dfadd2_f2_f_f(1, -a) : df(a, 0)); int o0 = (0.5f <= x.x && x.x <= 1.2), o2 = 2.3 < x.x; y = dfnormalize_f2_f2(dfmul_f2_f2_f2(dfadd2_f2_f2_f(x, 1), x)); y = dfnormalize_f2_f2(dfmul_f2_f2_f2(dfadd2_f2_f2_f(x, 2), y)); clln = (o2 && x.x <= 7) ? y : clln; x = (o2 && x.x <= 7) ? dfadd2_f2_f2_f(x, 3) : x; t = o2 ? (1.0 / x.x) : dfnormalize_f2_f2(dfadd2_f2_f2_f(x, o0 ? -1 : -2)).x; u = o2 ? +0.000839498720672087279971000786 : (o0 ? +0.9435157776e+0f : +0.1102489550e-3f); u = mlaf(u, t, o2 ? -5.17179090826059219329394422e-05 : (o0 ? +0.8670063615e+0f : +0.8160019934e-4f)); u = mlaf(u, t, o2 ? -0.000592166437353693882857342347 : (o0 ? +0.4826702476e+0f : +0.1528468856e-3f)); u = mlaf(u, t, o2 ? +6.97281375836585777403743539e-05 : (o0 ? -0.8855129778e-1f : -0.2355068718e-3f)); u = mlaf(u, t, o2 ? +0.000784039221720066627493314301 : (o0 ? +0.1013825238e+0f : +0.4962242092e-3f)); u = mlaf(u, t, o2 ? -0.000229472093621399176949318732 : (o0 ? -0.1493408978e+0f : -0.1193488017e-2f)); u = mlaf(u, t, o2 ? -0.002681327160493827160473958490 : (o0 ? +0.1697509140e+0f : +0.2891599433e-2f)); u = mlaf(u, t, o2 ? +0.003472222222222222222175164840 : (o0 ? -0.2072454542e+0f : -0.7385451812e-2f)); u = mlaf(u, t, o2 ? +0.083333333333333333335592087900 : (o0 ? +0.2705872357e+0f : +0.2058077045e-1f)); y = dfmul_f2_f2_f2(dfadd2_f2_f2_f(x, -0.5), logk2f(x)); y = dfadd2_f2_f2_f2(y, dfneg_f2_f2(x)); y = dfadd2_f2_f2_f2(y, dfx(0.91893853320467278056)); // 0.5*log(2*M_PI) z = dfadd2_f2_f2_f(dfmul_f2_f_f (u, t), o0 ? -0.400686534596170958447352690395e+0f : -0.673523028297382446749257758235e-1f); z = dfadd2_f2_f2_f(dfmul_f2_f2_f(z, t), o0 ? +0.822466960142643054450325495997e+0f : +0.322467033928981157743538726901e+0f); z = dfadd2_f2_f2_f(dfmul_f2_f2_f(z, t), o0 ? -0.577215665946766039837398973297e+0f : +0.422784335087484338986941629852e+0f); z = dfmul_f2_f2_f(z, t); clc = o2 ? y : z; clld = o2 ? dfadd2_f2_f2_f(dfmul_f2_f_f(u, t), 1) : clld; y = clln; clc = otiny ? dfx(41.58883083359671856503) : // log(2^60) (oref ? dfadd2_f2_f2_f2(dfx(1.1447298858494001639), dfneg_f2_f2(clc)) : clc); // log(M_PI) clln = otiny ? df(1, 0) : (oref ? clln : clld); if (oref) x = dfmul_f2_f2_f2(clld, sinpifk(a - (float)(1LL << 12) * (int32_t)(a * (1.0 / (1LL << 12))))); clld = otiny ? df(a*((1LL << 30)*(float)(1LL << 30)), 0) : (oref ? x : y); df2 ret = { clc, dfdiv_f2_f2_f2(clln, clld) }; return ret; } EXPORT CONST float xtgammaf_u1(float a) { df2 d = gammafk(a); Sleef_float2 y = dfmul_f2_f2_f2(expk2f(d.a), d.b); float r = y.x + y.y; r = (a == -SLEEF_INFINITYf || (a < 0 && xisintf(a)) || (xisnumberf(a) && a < 0 && xisnanf(r))) ? SLEEF_NANf : r; r = ((a == SLEEF_INFINITYf || xisnumberf(a)) && a >= -FLT_MIN && (a == 0 || a > 36 || xisnanf(r))) ? mulsignf(SLEEF_INFINITYf, a) : r; return r; } EXPORT CONST float xlgammaf_u1(float a) { df2 d = gammafk(a); Sleef_float2 y = dfadd2_f2_f2_f2(d.a, logk2f(dfabs_f2_f2(d.b))); float r = y.x + y.y; r = (xisinff(a) || (a <= 0 && xisintf(a)) || (xisnumberf(a) && xisnanf(r))) ? SLEEF_INFINITYf : r; return r; } EXPORT CONST float xerff_u1(float a) { float s = a, t, u; Sleef_float2 d; a = fabsfk(a); int o0 = a < 1.1f, o1 = a < 2.4f, o2 = a < 4.0f; u = o0 ? (a*a) : a; t = o0 ? +0.7089292194e-4f : o1 ? -0.1792667899e-4f : -0.9495757695e-5f; t = mlaf(t, u, o0 ? -0.7768311189e-3f : o1 ? +0.3937633010e-3f : +0.2481465926e-3f); t = mlaf(t, u, o0 ? +0.5159463733e-2f : o1 ? -0.3949181177e-2f : -0.2918176819e-2f); t = mlaf(t, u, o0 ? -0.2683781274e-1f : o1 ? +0.2445474640e-1f : +0.2059706673e-1f); t = mlaf(t, u, o0 ? +0.1128318012e+0f : o1 ? -0.1070996150e+0f : -0.9901899844e-1f); d = dfmul_f2_f_f(t, u); d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.376125876000657465175213237214e+0) : o1 ? dfx(-0.634588905908410389971210809210e+0) : dfx(-0.643598050547891613081201721633e+0)); d = dfmul_f2_f2_f(d, u); d = dfadd2_f2_f2_f2(d, o0 ? dfx(+0.112837916021059138255978217023e+1) : o1 ? dfx(-0.112879855826694507209862753992e+1) : dfx(-0.112461487742845562801052956293e+1)); d = dfmul_f2_f2_f(d, a); d = o0 ? d : dfadd_f2_f_f2(1.0, dfneg_f2_f2(expk2f(d))); u = mulsignf(o2 ? (d.x + d.y) : 1, s); u = xisnanf(a) ? SLEEF_NANf : u; return u; } EXPORT CONST float xerfcf_u15(float a) { float s = a, r = 0, t; Sleef_float2 u, d, x; a = fabsfk(a); int o0 = a < 1.0f, o1 = a < 2.2f, o2 = a < 4.3f, o3 = a < 10.1f; u = o1 ? df(a, 0) : dfdiv_f2_f2_f2(df(1, 0), df(a, 0)); t = o0 ? -0.8638041618e-4f : o1 ? -0.6236977242e-5f : o2 ? -0.3869504035e+0f : +0.1115344167e+1f; t = mlaf(t, u.x, o0 ? +0.6000166177e-3f : o1 ? +0.5749821503e-4f : o2 ? +0.1288077235e+1f : -0.9454904199e+0f); t = mlaf(t, u.x, o0 ? -0.1665703603e-2f : o1 ? +0.6002851478e-5f : o2 ? -0.1816803217e+1f : -0.3667259514e+0f); t = mlaf(t, u.x, o0 ? +0.1795156277e-3f : o1 ? -0.2851036377e-2f : o2 ? +0.1249150872e+1f : +0.7155663371e+0f); t = mlaf(t, u.x, o0 ? +0.1914106123e-1f : o1 ? +0.2260518074e-1f : o2 ? -0.1328857988e+0f : -0.1262947265e-1f); d = dfmul_f2_f2_f(u, t); d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.102775359343930288081655368891e+0) : o1 ? dfx(-0.105247583459338632253369014063e+0) : o2 ? dfx(-0.482365310333045318680618892669e+0) : dfx(-0.498961546254537647970305302739e+0)); d = dfmul_f2_f2_f2(d, u); d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.636619483208481931303752546439e+0) : o1 ? dfx(-0.635609463574589034216723775292e+0) : o2 ? dfx(-0.134450203224533979217859332703e-2) : dfx(-0.471199543422848492080722832666e-4)); d = dfmul_f2_f2_f2(d, u); d = dfadd2_f2_f2_f2(d, o0 ? dfx(-0.112837917790537404939545770596e+1) : o1 ? dfx(-0.112855987376668622084547028949e+1) : o2 ? dfx(-0.572319781150472949561786101080e+0) : dfx(-0.572364030327966044425932623525e+0)); x = dfmul_f2_f2_f(o1 ? d : df(-a, 0), a); x = o1 ? x : dfadd2_f2_f2_f2(x, d); x = expk2f(x); x = o1 ? x : dfmul_f2_f2_f2(x, u); r = o3 ? (x.x + x.y) : 0; if (s < 0) r = 2 - r; r = xisnanf(s) ? SLEEF_NANf : r; return r; } // #ifdef ENABLE_MAIN // gcc -w -DENABLE_MAIN -I../common sleefsp.c -lm #include int main(int argc, char **argv) { float d1 = atof(argv[1]); //float d2 = atof(argv[2]); //float d3 = atof(argv[3]); //printf("%.20g, %.20g\n", (double)d1, (double)d2); //float i2 = atoi(argv[2]); //float c = xatan2f_u1(d1, d2); //printf("round %.20g\n", (double)d1); printf("test = %.20g\n", (double)xsqrtf_u05(d1)); //printf("correct = %.20g\n", (double)roundf(d1)); //printf("rint %.20g\n", (double)d1); //printf("test = %.20g\n", (double)xrintf(d1)); //printf("correct = %.20g\n", (double)rintf(d1)); //Sleef_float2 r = xsincospif_u35(d); //printf("%g, %g\n", (double)r.x, (double)r.y); } #endif sleef-3.3.1/travis/000077500000000000000000000000001333715643700141405ustar00rootroot00000000000000sleef-3.3.1/travis/before_install.aarch64-gcc.sh000066400000000000000000000033761333715643700214560ustar00rootroot00000000000000#!/bin/bash set -ev export PATH=$PATH:/usr/bin dpkg --add-architecture arm64 cat /etc/apt/sources.list | sed -e 's/^deb /deb \[arch=amd64\] /g' -e 's/\[arch=amd64\] \[arch=amd64\]/\[arch=amd64\]/g' > /tmp/sources.list cat <> /tmp/sources.list deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME main restricted deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates main restricted deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME universe deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates universe deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-backports main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-backports main restricted deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security main restricted deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security universe deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security multiverse deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security multiverse EOF mv /tmp/sources.list /etc/apt/sources.list apt-get -qq update apt-get install -y git cmake gcc-aarch64-linux-gnu libc6-arm64-cross libc6:arm64 libmpfr-dev:arm64 libgomp1:arm64 libmpfr-dev binfmt-support qemu qemu-user-static libfftw3-dev:arm64 sleef-3.3.1/travis/before_install.armhf-gcc.sh000066400000000000000000000033551333715643700213200ustar00rootroot00000000000000#!/bin/bash set -ev export PATH=$PATH:/usr/bin dpkg --add-architecture armhf cat /etc/apt/sources.list | sed -e 's/^deb /deb \[arch=amd64\] /g' -e 's/\[arch=amd64\] \[arch=amd64\]/\[arch=amd64\]/g' > /tmp/sources.list cat <> /tmp/sources.list deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME main restricted deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates main restricted deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME universe deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates universe deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-backports main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-backports main restricted deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security main restricted deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security universe deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security multiverse deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security multiverse EOF mv /tmp/sources.list /etc/apt/sources.list apt-get -qq update apt-get install -y git cmake gcc-arm-linux-gnueabihf libc6-armhf-cross libc6:armhf libmpfr-dev:armhf libgomp1:armhf libmpfr-dev binfmt-support qemu qemu-user-static sleef-3.3.1/travis/before_install.osx-clang.sh000066400000000000000000000000401333715643700213500ustar00rootroot00000000000000#!/bin/bash set -ev brew update sleef-3.3.1/travis/before_install.osx-gcc.sh000066400000000000000000000000631333715643700210250ustar00rootroot00000000000000#!/bin/bash set -ev brew update brew install gcc@6 sleef-3.3.1/travis/before_install.ppc64el-clang.sh000066400000000000000000000034541333715643700220300ustar00rootroot00000000000000#!/bin/bash set -ev export PATH=$PATH:/usr/bin dpkg --add-architecture ppc64el cat /etc/apt/sources.list | sed -e 's/^deb /deb \[arch=amd64\] /g' -e 's/\[arch=amd64\] \[arch=amd64\]/\[arch=amd64\]/g' > /tmp/sources.list cat <> /tmp/sources.list deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ CODENAME main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME main restricted deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates main restricted deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ CODENAME universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME universe deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-updates universe deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-backports main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-backports main restricted deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security main restricted deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security main restricted deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security universe deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security universe deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security multiverse deb-src http://ports.ubuntu.com/ubuntu-ports/ CODENAME-security multiverse EOF mv /tmp/sources.list /etc/apt/sources.list apt-get -qq update apt-get install -y git cmake clang-5.0 gcc-powerpc64le-linux-gnu libc6-ppc64el-cross libc6-dev:ppc64el libmpfr-dev:ppc64el libgomp1:ppc64el libmpfr-dev binfmt-support qemu qemu-user-static libfftw3-dev:ppc64el sleef-3.3.1/travis/before_install.x86_64-clang.sh000066400000000000000000000003611333715643700215030ustar00rootroot00000000000000#!/bin/bash set -ev #tar xf sde-external-8.12.0-2017-10-23-lin.tar.bz2 # Turned off SDE to reduce time for testing sudo add-apt-repository -y ppa:adrozdoff/cmake sudo apt-get -qq update sudo apt-get install -y cmake libmpfr-dev libfftw3-dev sleef-3.3.1/travis/before_install.x86_64-gcc.sh000066400000000000000000000002671333715643700211600ustar00rootroot00000000000000#!/bin/bash set -ev tar xf sde-external-8.12.0-2017-10-23-lin.tar.bz2 sudo add-apt-repository -y ppa:adrozdoff/cmake sudo apt-get -qq update sudo apt-get install -y cmake libmpfr-dev sleef-3.3.1/travis/before_script.aarch64-gcc.sh000066400000000000000000000005401333715643700213020ustar00rootroot00000000000000#!/bin/bash set -ev cd /build mkdir build-native cd build-native cmake .. make -j 2 all cd /build mkdir build-cross cd build-cross cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-aarch64.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-aarch64-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. sleef-3.3.1/travis/before_script.armhf-gcc.sh000066400000000000000000000005321333715643700211500ustar00rootroot00000000000000#!/bin/bash set -ev cd /build mkdir build-native cd build-native cmake .. make -j 2 all cd /build mkdir build-cross cd build-cross cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-armhf.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-arm-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. sleef-3.3.1/travis/before_script.osx-clang.sh000066400000000000000000000002211333715643700212070ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. sleef-3.3.1/travis/before_script.osx-gcc.sh000066400000000000000000000002411333715643700206610ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export CC=gcc-6 cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. sleef-3.3.1/travis/before_script.ppc64el-clang.sh000066400000000000000000000010571333715643700216630ustar00rootroot00000000000000#!/bin/bash set -ev cd /build mkdir build-native cd build-native cmake .. make -j 2 all cd /build mkdir bin cat < /build/bin/ppc64el-cc #!/bin/sh clang-5.0 -target ppc64le-linux-gnu -mvsx -fuse-ld=/usr/powerpc64le-linux-gnu/bin/ld \$* EOF chmod +x /build/bin/ppc64el-cc export PATH=$PATH:/build/bin mkdir build-cross cd build-cross cmake -DCMAKE_TOOLCHAIN_FILE=../travis/toolchain-ppc64el.cmake -DNATIVE_BUILD_DIR=`pwd`/../build-native -DEMULATOR=qemu-ppc64le-static -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. sleef-3.3.1/travis/before_script.x86_64-clang.sh000066400000000000000000000002741333715643700213440ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export CC=clang-5.0 export CXX=clang++-5.0 cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. sleef-3.3.1/travis/before_script.x86_64-gcc.sh000066400000000000000000000002621333715643700210110ustar00rootroot00000000000000#!/bin/bash set -ev mkdir sleef.build cd sleef.build export CC=gcc-7 export CXX=g++-7 cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=../install -DSLEEF_SHOW_CONFIG=1 .. sleef-3.3.1/travis/ppc64el-cc000066400000000000000000000001421333715643700157200ustar00rootroot00000000000000#!/bin/sh clang-5.0 -target ppc64le-linux-gnu -mvsx -fuse-ld=/usr/powerpc64le-linux-gnu/bin/ld $* sleef-3.3.1/travis/script.aarch64-gcc.sh000066400000000000000000000001641333715643700177620ustar00rootroot00000000000000#!/bin/bash set -ev cd /build/build-cross make -j 2 all export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 2 make install sleef-3.3.1/travis/script.armhf-gcc.sh000066400000000000000000000001641333715643700176270ustar00rootroot00000000000000#!/bin/bash set -ev cd /build/build-cross make -j 2 all export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 2 make install sleef-3.3.1/travis/script.osx-clang.sh000066400000000000000000000001551333715643700176730ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build make -j 2 all export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 2 make install sleef-3.3.1/travis/script.osx-gcc.sh000066400000000000000000000001551333715643700173430ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build make -j 2 all export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 2 make install sleef-3.3.1/travis/script.ppc64el-clang.sh000066400000000000000000000002131333715643700203320ustar00rootroot00000000000000#!/bin/bash set -ev export QEMU_CPU=POWER8 cd /build/build-cross make -j 2 all export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 2 make install sleef-3.3.1/travis/script.x86_64-clang.sh000066400000000000000000000001551333715643700200200ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build make -j 2 all export CTEST_OUTPUT_ON_FAILURE=TRUE ctest -j 2 make install sleef-3.3.1/travis/script.x86_64-gcc.sh000066400000000000000000000001671333715643700174730ustar00rootroot00000000000000#!/bin/bash set -ev cd sleef.build make -j 2 all export CTEST_OUTPUT_ON_FAILURE=TRUE ctest --verbose -j 2 make install sleef-3.3.1/travis/setupdocker.sh000066400000000000000000000004531333715643700170260ustar00rootroot00000000000000#!/bin/bash set -ev docker pull ubuntu:xenial; docker run -d --name xenial -dti ubuntu:xenial bash; tar cfz /tmp/builddir.tgz . docker cp /tmp/builddir.tgz xenial:/tmp/ docker exec xenial mkdir /build docker exec xenial tar xfz /tmp/builddir.tgz -C /build docker exec xenial rm -f /tmp/builddir.tgz sleef-3.3.1/travis/toolchain-aarch64.cmake000066400000000000000000000007671333715643700203620ustar00rootroot00000000000000SET (CMAKE_CROSSCOMPILING TRUE) SET (CMAKE_SYSTEM_NAME "Linux") SET (CMAKE_SYSTEM_PROCESSOR "aarch64") SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu /usr/include/aarch64-linux-gnu /usr/lib/aarch64-linux-gnu) find_program(CMAKE_C_COMPILER aarch64-linux-gnu-gcc aarch64-linux-gnu-gcc-8 aarch64-linux-gnu-gcc-7 aarch64-linux-gnu-gcc-6 aarch64-linux-gnu-gcc-5) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) sleef-3.3.1/travis/toolchain-armhf.cmake000066400000000000000000000010051333715643700202110ustar00rootroot00000000000000SET (CMAKE_CROSSCOMPILING TRUE) SET (CMAKE_SYSTEM_NAME "Linux") SET (CMAKE_SYSTEM_PROCESSOR "armhf") SET(CMAKE_FIND_ROOT_PATH /usr/arm-linux-gnueabihf /usr/include/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabihf) find_program(CMAKE_C_COMPILER arm-linux-gnueabihf-gcc arm-linux-gnueabihf-gcc-8 arm-linux-gnueabihf-gcc-7 arm-linux-gnueabihf-gcc-6 arm-linux-gnueabihf-gcc-5) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) sleef-3.3.1/travis/toolchain-ppc64el.cmake000066400000000000000000000006261333715643700204010ustar00rootroot00000000000000SET (CMAKE_CROSSCOMPILING TRUE) SET (CMAKE_SYSTEM_NAME "Linux") SET (CMAKE_SYSTEM_PROCESSOR "ppc64") SET(CMAKE_FIND_ROOT_PATH /usr/powerpc64le-linux-gnu /usr/include/powerpc64le-linux-gnu /usr/lib/powerpc64le-linux-gnu) find_program(CMAKE_C_COMPILER ppc64el-cc) SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)